From a861ed411a359b7cb2b58d642e4a7f2fdcf92057 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 20 Sep 2024 11:22:03 +0100 Subject: [PATCH] [VPlan] Add initial loop-invariant code motion transform. (#107894) Add initial transform to move out loop-invariant recipes. This also helps to fix a divergence between legacy and VPlan-based cost model due to legacy using ScalarEvolution::isLoopInvariant in some cases. Fixes https://github.com/llvm/llvm-project/issues/107501. PR: https://github.com/llvm/llvm-project/pull/107894 --- .../Transforms/Vectorize/LoopVectorize.cpp | 3 +- .../Transforms/Vectorize/VPlanTransforms.cpp | 36 ++ .../AArch64/conditional-branches-cost.ll | 295 ++++--------- .../AArch64/divs-with-scalable-vfs.ll | 4 +- .../extractvalue-no-scalarization-required.ll | 54 +-- .../AArch64/force-target-instruction-cost.ll | 212 ++++++++- .../LoopVectorize/AArch64/induction-costs.ll | 2 +- .../AArch64/pr60831-sve-inv-store-crash.ll | 2 +- .../AArch64/reduction-recurrence-costs-sve.ll | 40 +- .../LoopVectorize/AArch64/store-costs-sve.ll | 6 +- .../AArch64/sve-cond-inv-loads.ll | 6 +- .../LoopVectorize/AArch64/sve-inv-loads.ll | 4 +- .../AArch64/sve-widen-extractvalue.ll | 2 +- .../LoopVectorize/RISCV/dead-ops-cost.ll | 6 +- .../Transforms/LoopVectorize/RISCV/divrem.ll | 12 +- .../Transforms/LoopVectorize/RISCV/pr88802.ll | 4 +- .../truncate-to-minimal-bitwidth-cost.ll | 4 +- .../LoopVectorize/SystemZ/pr47665.ll | 2 +- .../LoopVectorize/X86/cost-model.ll | 143 +++++-- .../X86/divs-with-tail-folding.ll | 2 +- .../X86/invariant-load-gather.ll | 4 +- .../LoopVectorize/X86/uniform_mem_op.ll | 6 +- .../LoopVectorize/blend-in-header.ll | 2 +- ...-order-recurrence-sink-replicate-region.ll | 6 +- .../LoopVectorize/first-order-recurrence.ll | 402 +++++++++--------- .../pr55167-fold-tail-live-out.ll | 6 +- .../LoopVectorize/reduction-inloop-uf4.ll | 12 +- .../scalable-trunc-min-bitwidth.ll | 2 +- .../Transforms/LoopVectorize/select-cmp.ll | 4 +- .../LoopVectorize/skeleton-lcssa-crash.ll | 2 +- .../LoopVectorize/trunc-extended-icmps.ll | 2 +- .../LoopVectorize/trunc-loads-p16.ll | 2 +- .../Transforms/LoopVectorize/trunc-shifts.ll | 6 +- .../Transforms/LoopVectorize/vector-geps.ll | 6 +- .../version-stride-with-integer-casts.ll | 11 +- .../vplan-sink-scalars-and-merge.ll | 2 +- .../widen-gep-all-indices-invariant.ll | 6 +- 37 files changed, 746 insertions(+), 574 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9f554827a8287d..806dbd5bb7304e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2378,7 +2378,8 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr, AC->registerAssumption(II); // End if-block. - bool IfPredicateInstr = RepRecipe->getParent()->getParent()->isReplicator(); + VPRegionBlock *Parent = RepRecipe->getParent()->getParent(); + bool IfPredicateInstr = Parent ? Parent->isReplicator() : false; if (IfPredicateInstr) PredicatedInstructions.push_back(Cloned); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index edcd7d26e60daa..06ac15f52fe137 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -971,6 +971,41 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return R.getVPSingleValue()->replaceAllUsesWith(A); } +/// Move loop-invariant recipes out of the vector loop region in \p Plan. +static void licm(VPlan &Plan) { + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPBasicBlock *Preheader = + cast(LoopRegion->getSinglePredecessor()); + + // Return true if we do not know how to (mechanically) hoist a given recipe + // out of a loop region. Does not address legality concerns such as aliasing + // or speculation safety. + auto CannotHoistRecipe = [](VPRecipeBase &R) { + // Allocas cannot be hoisted. + auto *RepR = dyn_cast(&R); + return RepR && RepR->getOpcode() == Instruction::Alloca; + }; + + // Hoist any loop invariant recipes from the vector loop region to the + // preheader. Preform a shallow traversal of the vector loop region, to + // exclude recipes in replicate regions. + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( + vp_depth_first_shallow(LoopRegion->getEntry()))) { + for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { + if (CannotHoistRecipe(R)) + continue; + // TODO: Relax checks in the future, e.g. we could also hoist reads, if + // their memory location is not modified in the vector loop. + if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() || + any_of(R.operands(), [](VPValue *Op) { + return !Op->isDefinedOutsideLoopRegions(); + })) + continue; + R.moveBefore(*Preheader, Preheader->end()); + } + } +} + /// Try to simplify the recipes in \p Plan. static void simplifyRecipes(VPlan &Plan) { ReversePostOrderTraversal> RPOT( @@ -1123,6 +1158,7 @@ void VPlanTransforms::optimize(VPlan &Plan) { removeRedundantInductionCasts(Plan); simplifyRecipes(Plan); + licm(Plan); legalizeAndOptimizeInductions(Plan); removeDeadRecipes(Plan); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 9910be7224674c..04219fcc4de576 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -9,98 +9,9 @@ define void @invar_cond_gep_store(ptr %dst, i32 %0) { ; DEFAULT-LABEL: define void @invar_cond_gep_store( ; DEFAULT-SAME: ptr [[DST:%.*]], i32 [[TMP0:%.*]]) { ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; DEFAULT: vector.ph: -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] -; DEFAULT: vector.body: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ] -; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer -; DEFAULT-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer -; DEFAULT-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; DEFAULT-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; DEFAULT: pred.store.if: -; DEFAULT-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 -; DEFAULT-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 1 -; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP5]] -; DEFAULT-NEXT: store i32 1, ptr [[TMP6]], align 4 -; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE]] -; DEFAULT: pred.store.continue: -; DEFAULT-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 -; DEFAULT-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] -; DEFAULT: pred.store.if1: -; DEFAULT-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 1 -; DEFAULT-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1 -; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP9]] -; DEFAULT-NEXT: store i32 1, ptr [[TMP10]], align 4 -; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE2]] -; DEFAULT: pred.store.continue2: -; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 -; DEFAULT-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] -; DEFAULT: pred.store.if3: -; DEFAULT-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 2 -; DEFAULT-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 1 -; DEFAULT-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP13]] -; DEFAULT-NEXT: store i32 1, ptr [[TMP14]], align 4 -; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE4]] -; DEFAULT: pred.store.continue4: -; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; DEFAULT-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] -; DEFAULT: pred.store.if5: -; DEFAULT-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 3 -; DEFAULT-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], 1 -; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP17]] -; DEFAULT-NEXT: store i32 1, ptr [[TMP18]], align 4 -; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE6]] -; DEFAULT: pred.store.continue6: -; DEFAULT-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 -; DEFAULT-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; DEFAULT: pred.store.if7: -; DEFAULT-NEXT: [[TMP20:%.*]] = add i64 [[OFFSET_IDX]], 4 -; DEFAULT-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 1 -; DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP21]] -; DEFAULT-NEXT: store i32 1, ptr [[TMP22]], align 4 -; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE8]] -; DEFAULT: pred.store.continue8: -; DEFAULT-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 -; DEFAULT-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] -; DEFAULT: pred.store.if9: -; DEFAULT-NEXT: [[TMP24:%.*]] = add i64 [[OFFSET_IDX]], 5 -; DEFAULT-NEXT: [[TMP25:%.*]] = add i64 [[TMP24]], 1 -; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP25]] -; DEFAULT-NEXT: store i32 1, ptr [[TMP26]], align 4 -; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE10]] -; DEFAULT: pred.store.continue10: -; DEFAULT-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 -; DEFAULT-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] -; DEFAULT: pred.store.if11: -; DEFAULT-NEXT: [[TMP28:%.*]] = add i64 [[OFFSET_IDX]], 6 -; DEFAULT-NEXT: [[TMP29:%.*]] = add i64 [[TMP28]], 1 -; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP29]] -; DEFAULT-NEXT: store i32 1, ptr [[TMP30]], align 4 -; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE12]] -; DEFAULT: pred.store.continue12: -; DEFAULT-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 -; DEFAULT-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14]] -; DEFAULT: pred.store.if13: -; DEFAULT-NEXT: [[TMP32:%.*]] = add i64 [[OFFSET_IDX]], 7 -; DEFAULT-NEXT: [[TMP33:%.*]] = add i64 [[TMP32]], 1 -; DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP33]] -; DEFAULT-NEXT: store i32 1, ptr [[TMP34]], align 4 -; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE14]] -; DEFAULT: pred.store.continue14: -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; DEFAULT-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 -; DEFAULT-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; DEFAULT: middle.block: -; DEFAULT-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] -; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 97, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: br label [[LOOP_HEADER:%.*]] ; DEFAULT: loop.header: -; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; DEFAULT-NEXT: [[CMP9:%.*]] = icmp eq i32 [[TMP0]], 0 ; DEFAULT-NEXT: br i1 [[CMP9]], label [[THEN:%.*]], label [[LOOP_LATCH]] @@ -110,68 +21,16 @@ define void @invar_cond_gep_store(ptr %dst, i32 %0) { ; DEFAULT-NEXT: br label [[LOOP_LATCH]] ; DEFAULT: loop.latch: ; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 100 -; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_HEADER]] ; DEFAULT: exit: ; DEFAULT-NEXT: ret void ; ; PRED-LABEL: define void @invar_cond_gep_store( ; PRED-SAME: ptr [[DST:%.*]], i32 [[TMP0:%.*]]) { ; PRED-NEXT: entry: -; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; PRED: vector.ph: -; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; PRED-NEXT: br label [[VECTOR_BODY:%.*]] -; PRED: vector.body: -; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] -; PRED-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; PRED-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer -; PRED-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; PRED-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; PRED: pred.store.if: -; PRED-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 -; PRED-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 1 -; PRED-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP4]] -; PRED-NEXT: store i32 1, ptr [[TMP5]], align 4 -; PRED-NEXT: br label [[PRED_STORE_CONTINUE]] -; PRED: pred.store.continue: -; PRED-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 -; PRED-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] -; PRED: pred.store.if1: -; PRED-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 1 -; PRED-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 1 -; PRED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP8]] -; PRED-NEXT: store i32 1, ptr [[TMP9]], align 4 -; PRED-NEXT: br label [[PRED_STORE_CONTINUE2]] -; PRED: pred.store.continue2: -; PRED-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 -; PRED-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] -; PRED: pred.store.if3: -; PRED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 2 -; PRED-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 1 -; PRED-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP12]] -; PRED-NEXT: store i32 1, ptr [[TMP13]], align 4 -; PRED-NEXT: br label [[PRED_STORE_CONTINUE4]] -; PRED: pred.store.continue4: -; PRED-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; PRED-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] -; PRED: pred.store.if5: -; PRED-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 3 -; PRED-NEXT: [[TMP16:%.*]] = add i64 [[TMP15]], 1 -; PRED-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP16]] -; PRED-NEXT: store i32 1, ptr [[TMP17]], align 4 -; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]] -; PRED: pred.store.continue6: -; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; PRED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 -; PRED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; PRED: middle.block: -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 101, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] ; PRED-NEXT: br label [[LOOP_HEADER:%.*]] ; PRED: loop.header: -; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; PRED-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; PRED-NEXT: [[CMP9:%.*]] = icmp eq i32 [[TMP0]], 0 ; PRED-NEXT: br i1 [[CMP9]], label [[THEN:%.*]], label [[LOOP_LATCH]] @@ -181,7 +40,7 @@ define void @invar_cond_gep_store(ptr %dst, i32 %0) { ; PRED-NEXT: br label [[LOOP_LATCH]] ; PRED: loop.latch: ; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 100 -; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; PRED-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_HEADER]] ; PRED: exit: ; PRED-NEXT: ret void ; @@ -260,7 +119,7 @@ define void @loop_dependent_cond(ptr %src, ptr noalias %dst, i64 %N) { ; DEFAULT: pred.store.continue7: ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; DEFAULT-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_END123:%.*]], label [[SCALAR_PH]] @@ -280,7 +139,7 @@ define void @loop_dependent_cond(ptr %src, ptr noalias %dst, i64 %N) { ; DEFAULT: loop.latch: ; DEFAULT-NEXT: [[IV_NEXT176]] = add i64 [[IV175]], 1 ; DEFAULT-NEXT: [[EXITCOND180_NOT:%.*]] = icmp eq i64 [[IV175]], [[N]] -; DEFAULT-NEXT: br i1 [[EXITCOND180_NOT]], label [[FOR_END123]], label [[FOR_BODY112]], !llvm.loop [[LOOP5:![0-9]+]] +; DEFAULT-NEXT: br i1 [[EXITCOND180_NOT]], label [[FOR_END123]], label [[FOR_BODY112]], !llvm.loop [[LOOP3:![0-9]+]] ; DEFAULT: exit: ; DEFAULT-NEXT: ret void ; @@ -494,7 +353,7 @@ define void @latch_branch_cost(ptr %dst) { ; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; DEFAULT-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 -; DEFAULT-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: br i1 false, label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; DEFAULT: vec.epilog.iter.check: @@ -510,7 +369,7 @@ define void @latch_branch_cost(ptr %dst) { ; DEFAULT-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP9]], align 1 ; DEFAULT-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4 ; DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 100 -; DEFAULT-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: vec.epilog.middle.block: ; DEFAULT-NEXT: br i1 true, label [[FOR_END]], label [[SCALAR_PH]] ; DEFAULT: vec.epilog.scalar.ph: @@ -522,7 +381,7 @@ define void @latch_branch_cost(ptr %dst) { ; DEFAULT-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1 ; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100 -; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; DEFAULT: exit: ; DEFAULT-NEXT: ret void ; @@ -603,7 +462,7 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], ; PRED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104 -; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: @@ -615,7 +474,7 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: store i8 0, ptr [[GEP]], align 1 ; PRED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[IV]], 1 ; PRED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100 -; PRED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; PRED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; PRED: exit: ; PRED-NEXT: ret void ; @@ -693,24 +552,24 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; DEFAULT-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META9:![0-9]+]] +; DEFAULT-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META7:![0-9]+]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector [[BROADCAST_SPLATINSERT28]], poison, zeroinitializer -; DEFAULT-NEXT: [[TMP12:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META12:![0-9]+]] +; DEFAULT-NEXT: [[TMP12:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META10:![0-9]+]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP12]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; DEFAULT-NEXT: [[TMP13:%.*]] = or [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT29]] -; DEFAULT-NEXT: [[TMP14:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META14:![0-9]+]] +; DEFAULT-NEXT: [[TMP14:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META12:![0-9]+]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement poison, i32 [[TMP14]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector [[BROADCAST_SPLATINSERT30]], poison, zeroinitializer ; DEFAULT-NEXT: [[TMP15:%.*]] = icmp ugt [[BROADCAST_SPLAT31]], [[TMP13]] ; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[D]], i64 [[TMP10]] -; DEFAULT-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP13]], [[BROADCAST_SPLAT33]], i32 4, [[TMP15]]), !alias.scope [[META16:![0-9]+]], !noalias [[META18:![0-9]+]] +; DEFAULT-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP13]], [[BROADCAST_SPLAT33]], i32 4, [[TMP15]]), !alias.scope [[META14:![0-9]+]], !noalias [[META16:![0-9]+]] ; DEFAULT-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 0 -; DEFAULT-NEXT: call void @llvm.masked.store.nxv4i32.p0( zeroinitializer, ptr [[TMP17]], i32 4, [[TMP15]]), !alias.scope [[META20:![0-9]+]], !noalias [[META21:![0-9]+]] +; DEFAULT-NEXT: call void @llvm.masked.store.nxv4i32.p0( zeroinitializer, ptr [[TMP17]], i32 4, [[TMP15]]), !alias.scope [[META18:![0-9]+]], !noalias [[META19:![0-9]+]] ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -733,7 +592,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; DEFAULT: loop.latch: ; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; DEFAULT-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV]], [[N]] -; DEFAULT-NEXT: br i1 [[C_1]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP23:![0-9]+]] +; DEFAULT-NEXT: br i1 [[C_1]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]] ; DEFAULT: exit: ; DEFAULT-NEXT: ret i32 0 ; @@ -800,27 +659,27 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; PRED-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0 -; PRED-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META6:![0-9]+]] +; PRED-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META4:![0-9]+]] ; PRED-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement poison, i32 [[TMP16]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector [[BROADCAST_SPLATINSERT28]], poison, zeroinitializer -; PRED-NEXT: [[TMP17:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META9:![0-9]+]] +; PRED-NEXT: [[TMP17:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META7:![0-9]+]] ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP17]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PRED-NEXT: [[TMP18:%.*]] = or [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT29]] -; PRED-NEXT: [[TMP19:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META11:![0-9]+]] +; PRED-NEXT: [[TMP19:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META9:![0-9]+]] ; PRED-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement poison, i32 [[TMP19]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector [[BROADCAST_SPLATINSERT30]], poison, zeroinitializer ; PRED-NEXT: [[TMP20:%.*]] = icmp ugt [[BROADCAST_SPLAT31]], [[TMP18]] ; PRED-NEXT: [[TMP21:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP20]], zeroinitializer ; PRED-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[D]], i64 [[TMP15]] -; PRED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP18]], [[BROADCAST_SPLAT33]], i32 4, [[TMP21]]), !alias.scope [[META13:![0-9]+]], !noalias [[META15:![0-9]+]] +; PRED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP18]], [[BROADCAST_SPLAT33]], i32 4, [[TMP21]]), !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]] ; PRED-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i32 0 -; PRED-NEXT: call void @llvm.masked.store.nxv4i32.p0( zeroinitializer, ptr [[TMP23]], i32 4, [[TMP21]]), !alias.scope [[META17:![0-9]+]], !noalias [[META18:![0-9]+]] +; PRED-NEXT: call void @llvm.masked.store.nxv4i32.p0( zeroinitializer, ptr [[TMP23]], i32 4, [[TMP21]]), !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]] ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP14]]) ; PRED-NEXT: [[TMP24:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; PRED-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 -; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: @@ -842,7 +701,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; PRED: loop.latch: ; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; PRED-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV]], [[N]] -; PRED-NEXT: br i1 [[C_1]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP20:![0-9]+]] +; PRED-NEXT: br i1 [[C_1]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP18:![0-9]+]] ; PRED: exit: ; PRED-NEXT: ret i32 0 ; @@ -895,7 +754,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; DEFAULT-NEXT: store <8 x double> [[TMP3]], ptr [[TMP4]], align 8 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; DEFAULT-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; DEFAULT-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: @@ -915,7 +774,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; DEFAULT-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 8 ; DEFAULT-NEXT: [[IV_CLAMP:%.*]] = and i64 [[IV]], 4294967294 ; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_CLAMP]], 512 -; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]] +; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]] ; DEFAULT: exit: ; DEFAULT-NEXT: ret void ; @@ -959,7 +818,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]]) ; PRED-NEXT: [[TMP16:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; PRED-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 -; PRED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; PRED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: @@ -979,7 +838,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 8 ; PRED-NEXT: [[IV_CLAMP:%.*]] = and i64 [[IV]], 4294967294 ; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_CLAMP]], 512 -; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP22:![0-9]+]] +; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP20:![0-9]+]] ; PRED: exit: ; PRED-NEXT: ret void ; @@ -1089,7 +948,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; DEFAULT: pred.store.continue14: ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], ; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 -; DEFAULT-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; DEFAULT-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: @@ -1102,7 +961,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; DEFAULT-NEXT: store i8 [[IV_TRUNC]], ptr [[GEP]], align 1 ; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 7 -; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]] +; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]] ; DEFAULT: exit: ; DEFAULT-NEXT: ret void ; @@ -1191,7 +1050,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; PRED: pred.store.continue14: ; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 -; PRED-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; PRED-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: @@ -1204,7 +1063,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; PRED-NEXT: store i8 [[IV_TRUNC]], ptr [[GEP]], align 1 ; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 7 -; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP24:![0-9]+]] +; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP22:![0-9]+]] ; PRED: exit: ; PRED-NEXT: ret void ; @@ -1405,7 +1264,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], ; DEFAULT-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -1436,7 +1295,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT: loop.latch: ; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP29:![0-9]+]] +; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP27:![0-9]+]] ; DEFAULT: exit: ; DEFAULT-NEXT: ret void ; @@ -1628,7 +1487,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: [[TMP84:%.*]] = xor <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], ; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], ; PRED-NEXT: [[TMP85:%.*]] = extractelement <8 x i1> [[TMP84]], i32 0 -; PRED-NEXT: br i1 [[TMP85]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; PRED-NEXT: br i1 [[TMP85]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: @@ -1658,7 +1517,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED: loop.latch: ; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP26:![0-9]+]] +; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP24:![0-9]+]] ; PRED: exit: ; PRED-NEXT: ret void ; @@ -1741,7 +1600,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 -; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: @@ -1757,7 +1616,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; DEFAULT-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[IV_NEXT]] to i32 ; DEFAULT-NEXT: store i32 [[T]], ptr [[DST]], align 4 ; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 21 -; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP31:![0-9]+]] +; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP29:![0-9]+]] ; DEFAULT: exit: ; DEFAULT-NEXT: ret void ; @@ -1804,7 +1663,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 -; PRED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; PRED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: @@ -1820,7 +1679,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; PRED-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[IV_NEXT]] to i32 ; PRED-NEXT: store i32 [[T]], ptr [[DST]], align 4 ; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 21 -; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP28:![0-9]+]] +; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP26:![0-9]+]] ; PRED: exit: ; PRED-NEXT: ret void ; @@ -1857,61 +1716,57 @@ attributes #2 = { vscale_range(2,2) "target-cpu"="neoverse-512tvb" } ; DEFAULT: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; DEFAULT: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} ; DEFAULT: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; DEFAULT: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} -; DEFAULT: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; DEFAULT: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} -; DEFAULT: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]} -; DEFAULT: [[META9]] = !{[[META10:![0-9]+]]} -; DEFAULT: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]]} -; DEFAULT: [[META11]] = distinct !{[[META11]], !"LVerDomain"} +; DEFAULT: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} +; DEFAULT: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]} +; DEFAULT: [[META7]] = !{[[META8:![0-9]+]]} +; DEFAULT: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]]} +; DEFAULT: [[META9]] = distinct !{[[META9]], !"LVerDomain"} +; DEFAULT: [[META10]] = !{[[META11:![0-9]+]]} +; DEFAULT: [[META11]] = distinct !{[[META11]], [[META9]]} ; DEFAULT: [[META12]] = !{[[META13:![0-9]+]]} -; DEFAULT: [[META13]] = distinct !{[[META13]], [[META11]]} +; DEFAULT: [[META13]] = distinct !{[[META13]], [[META9]]} ; DEFAULT: [[META14]] = !{[[META15:![0-9]+]]} -; DEFAULT: [[META15]] = distinct !{[[META15]], [[META11]]} -; DEFAULT: [[META16]] = !{[[META17:![0-9]+]]} -; DEFAULT: [[META17]] = distinct !{[[META17]], [[META11]]} -; DEFAULT: [[META18]] = !{[[META19:![0-9]+]], [[META10]], [[META13]], [[META15]]} -; DEFAULT: [[META19]] = distinct !{[[META19]], [[META11]]} -; DEFAULT: [[META20]] = !{[[META19]]} -; DEFAULT: [[META21]] = !{[[META10]], [[META13]], [[META15]]} +; DEFAULT: [[META15]] = distinct !{[[META15]], [[META9]]} +; DEFAULT: [[META16]] = !{[[META17:![0-9]+]], [[META8]], [[META11]], [[META13]]} +; DEFAULT: [[META17]] = distinct !{[[META17]], [[META9]]} +; DEFAULT: [[META18]] = !{[[META17]]} +; DEFAULT: [[META19]] = !{[[META8]], [[META11]], [[META13]]} +; DEFAULT: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]} +; DEFAULT: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]]} ; DEFAULT: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]} -; DEFAULT: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]]} +; DEFAULT: [[LOOP23]] = distinct !{[[LOOP23]], [[META2]], [[META1]]} ; DEFAULT: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]} ; DEFAULT: [[LOOP25]] = distinct !{[[LOOP25]], [[META2]], [[META1]]} ; DEFAULT: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]} -; DEFAULT: [[LOOP27]] = distinct !{[[LOOP27]], [[META2]], [[META1]]} +; DEFAULT: [[LOOP27]] = distinct !{[[LOOP27]], [[META1]]} ; DEFAULT: [[LOOP28]] = distinct !{[[LOOP28]], [[META1]], [[META2]]} -; DEFAULT: [[LOOP29]] = distinct !{[[LOOP29]], [[META1]]} -; DEFAULT: [[LOOP30]] = distinct !{[[LOOP30]], [[META1]], [[META2]]} -; DEFAULT: [[LOOP31]] = distinct !{[[LOOP31]], [[META2]], [[META1]]} +; DEFAULT: [[LOOP29]] = distinct !{[[LOOP29]], [[META2]], [[META1]]} ;. ; PRED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; PRED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; PRED: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; PRED: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; PRED: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; PRED: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} -; PRED: [[META6]] = !{[[META7:![0-9]+]]} -; PRED: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]} -; PRED: [[META8]] = distinct !{[[META8]], !"LVerDomain"} +; PRED: [[META4]] = !{[[META5:![0-9]+]]} +; PRED: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]} +; PRED: [[META6]] = distinct !{[[META6]], !"LVerDomain"} +; PRED: [[META7]] = !{[[META8:![0-9]+]]} +; PRED: [[META8]] = distinct !{[[META8]], [[META6]]} ; PRED: [[META9]] = !{[[META10:![0-9]+]]} -; PRED: [[META10]] = distinct !{[[META10]], [[META8]]} +; PRED: [[META10]] = distinct !{[[META10]], [[META6]]} ; PRED: [[META11]] = !{[[META12:![0-9]+]]} -; PRED: [[META12]] = distinct !{[[META12]], [[META8]]} -; PRED: [[META13]] = !{[[META14:![0-9]+]]} -; PRED: [[META14]] = distinct !{[[META14]], [[META8]]} -; PRED: [[META15]] = !{[[META16:![0-9]+]], [[META7]], [[META10]], [[META12]]} -; PRED: [[META16]] = distinct !{[[META16]], [[META8]]} -; PRED: [[META17]] = !{[[META16]]} -; PRED: [[META18]] = !{[[META7]], [[META10]], [[META12]]} +; PRED: [[META12]] = distinct !{[[META12]], [[META6]]} +; PRED: [[META13]] = !{[[META14:![0-9]+]], [[META5]], [[META8]], [[META10]]} +; PRED: [[META14]] = distinct !{[[META14]], [[META6]]} +; PRED: [[META15]] = !{[[META14]]} +; PRED: [[META16]] = !{[[META5]], [[META8]], [[META10]]} +; PRED: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]} +; PRED: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]]} ; PRED: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]} -; PRED: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]]} +; PRED: [[LOOP20]] = distinct !{[[LOOP20]], [[META2]], [[META1]]} ; PRED: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]} ; PRED: [[LOOP22]] = distinct !{[[LOOP22]], [[META2]], [[META1]]} ; PRED: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]], [[META2]]} -; PRED: [[LOOP24]] = distinct !{[[LOOP24]], [[META2]], [[META1]]} +; PRED: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]]} ; PRED: [[LOOP25]] = distinct !{[[LOOP25]], [[META1]], [[META2]]} -; PRED: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]]} -; PRED: [[LOOP27]] = distinct !{[[LOOP27]], [[META1]], [[META2]]} -; PRED: [[LOOP28]] = distinct !{[[LOOP28]], [[META2]], [[META1]]} +; PRED: [[LOOP26]] = distinct !{[[LOOP26]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 3ffff2f813fc04..5d163a97e3be3d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -26,6 +26,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 +; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]] +; CHECK-NEXT: [[TMP19:%.*]] = sdiv i64 [[M]], [[CONV6]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -35,8 +37,6 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], 0 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 1 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]] -; CHECK-NEXT: [[TMP19:%.*]] = sdiv i64 [[M]], [[CONV6]] ; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP18]] to i32 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP19]] to i32 ; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP18]], [[CONV61]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll index 3897218510a805..4fd9604dd6d257 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll @@ -17,22 +17,23 @@ ; Check that the extractvalue operands are actually free in vector code. -; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph -; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; FORCED-NEXT: %0 = add i32 %index, 0 -; FORCED-NEXT: %1 = extractvalue { i64, i64 } %sv, 0 -; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i64> poison, i64 %1, i64 0 +; FORCED: [[E1:%.+]] = extractvalue { i64, i64 } %sv, 0 +; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i64> poison, i64 [[E1]], i64 0 ; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: %2 = extractvalue { i64, i64 } %sv, 1 -; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 %2, i64 0 +; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1 +; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 [[E2]], i64 0 ; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: %3 = getelementptr i64, ptr %dst, i32 %0 -; FORCED-NEXT: %4 = add <2 x i64> %broadcast.splat, %broadcast.splat2 -; FORCED-NEXT: %5 = getelementptr i64, ptr %3, i32 0 -; FORCED-NEXT: store <2 x i64> %4, ptr %5, align 4 + +; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph +; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; FORCED-NEXT: [[IV_0:%.]] = add i32 %index, 0 +; FORCED-NEXT: [[GEP:%.+]] = getelementptr i64, ptr %dst, i32 [[IV_0]] +; FORCED-NEXT: [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2 +; FORCED-NEXT: [[GEP2:%.+]] = getelementptr i64, ptr [[GEP]], i32 0 +; FORCED-NEXT: store <2 x i64> [[ADD]], ptr [[GEP2]], align 4 ; FORCED-NEXT: %index.next = add nuw i32 %index, 2 -; FORCED-NEXT: %6 = icmp eq i32 %index.next, 1000 -; FORCED-NEXT: br i1 %6, label %middle.block, label %vector.body +; FORCED-NEXT: [[C:%.+]] = icmp eq i32 %index.next, 1000 +; FORCED-NEXT: br i1 [[C]], label %middle.block, label %vector.body define void @test1(ptr %dst, {i64, i64} %sv) { entry: @@ -66,22 +67,23 @@ declare float @powf(float, float) readnone nounwind ; FORCED-LABEL: define void @test_getVectorCallCost -; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph -; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; FORCED-NEXT: %0 = add i32 %index, 0 -; FORCED-NEXT: %1 = extractvalue { float, float } %sv, 0 -; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x float> poison, float %1, i64 0 +; FORCED: [[E1:%.+]] = extractvalue { float, float } %sv, 0 +; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x float> poison, float [[E1]], i64 0 ; FORCED-NEXT: %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: %2 = extractvalue { float, float } %sv, 1 -; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x float> poison, float %2, i64 0 +; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1 +; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x float> poison, float [[E2]], i64 0 ; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: %3 = getelementptr float, ptr %dst, i32 %0 -; FORCED-NEXT: %4 = call <2 x float> @llvm.pow.v2f32(<2 x float> %broadcast.splat, <2 x float> %broadcast.splat2) -; FORCED-NEXT: %5 = getelementptr float, ptr %3, i32 0 -; FORCED-NEXT: store <2 x float> %4, ptr %5, align 4 + +; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph +; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; FORCED-NEXT: [[IV0:%.+]] = add i32 %index, 0 +; FORCED-NEXT: [[GEP1:%.+]] = getelementptr float, ptr %dst, i32 [[IV0]] +; FORCED-NEXT: [[POW:%.+]] = call <2 x float> @llvm.pow.v2f32(<2 x float> %broadcast.splat, <2 x float> %broadcast.splat2) +; FORCED-NEXT: [[GEP2:%.+]] = getelementptr float, ptr [[GEP1]], i32 0 +; FORCED-NEXT: store <2 x float> [[POW]], ptr [[GEP2]], align 4 ; FORCED-NEXT: %index.next = add nuw i32 %index, 2 -; FORCED-NEXT: %6 = icmp eq i32 %index.next, 1000 -; FORCED-NEXT: br i1 %6, label %middle.block, label %vector.body +; FORCED-NEXT: [[C:%.+]] = icmp eq i32 %index.next, 1000 +; FORCED-NEXT: br i1 [[C]], label %middle.block, label %vector.body define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 9ca6c527540114..651fcf6bedc2d9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -156,9 +156,201 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK-LABEL: define void @test_exit_branch_cost( ; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]], i32 [[Y:%.*]], ptr [[DST_1:%.*]], i1 [[C_4:%.*]], ptr [[SRC:%.*]], ptr [[DST_3:%.*]], i1 [[C_3:%.*]], ptr [[DST_2:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST_1]], i64 8 +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST_3]], i64 8 +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[DST_2]], i64 8 +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 8 +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[DST_3]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[BOUND05:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP2]] +; CHECK-NEXT: [[BOUND16:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT7:%.*]] = and i1 [[BOUND05]], [[BOUND16]] +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT7]] +; CHECK-NEXT: [[BOUND08:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP3]] +; CHECK-NEXT: [[BOUND19:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT10:%.*]] = and i1 [[BOUND08]], [[BOUND19]] +; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT10]] +; CHECK-NEXT: [[BOUND012:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP4]] +; CHECK-NEXT: [[BOUND113:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]] +; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX11]], [[FOUND_CONFLICT14]] +; CHECK-NEXT: [[BOUND016:%.*]] = icmp ult ptr [[DST_3]], [[SCEVGEP2]] +; CHECK-NEXT: [[BOUND117:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP1]] +; CHECK-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] +; CHECK-NEXT: [[CONFLICT_RDX19:%.*]] = or i1 [[CONFLICT_RDX15]], [[FOUND_CONFLICT18]] +; CHECK-NEXT: [[BOUND020:%.*]] = icmp ult ptr [[DST_3]], [[SCEVGEP3]] +; CHECK-NEXT: [[BOUND121:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]] +; CHECK-NEXT: [[FOUND_CONFLICT22:%.*]] = and i1 [[BOUND020]], [[BOUND121]] +; CHECK-NEXT: [[CONFLICT_RDX23:%.*]] = or i1 [[CONFLICT_RDX19]], [[FOUND_CONFLICT22]] +; CHECK-NEXT: [[BOUND024:%.*]] = icmp ult ptr [[DST_3]], [[SCEVGEP4]] +; CHECK-NEXT: [[BOUND125:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP1]] +; CHECK-NEXT: [[FOUND_CONFLICT26:%.*]] = and i1 [[BOUND024]], [[BOUND125]] +; CHECK-NEXT: [[CONFLICT_RDX27:%.*]] = or i1 [[CONFLICT_RDX23]], [[FOUND_CONFLICT26]] +; CHECK-NEXT: [[BOUND028:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP3]] +; CHECK-NEXT: [[BOUND129:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]] +; CHECK-NEXT: [[FOUND_CONFLICT30:%.*]] = and i1 [[BOUND028]], [[BOUND129]] +; CHECK-NEXT: [[CONFLICT_RDX31:%.*]] = or i1 [[CONFLICT_RDX27]], [[FOUND_CONFLICT30]] +; CHECK-NEXT: [[BOUND032:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP4]] +; CHECK-NEXT: [[BOUND133:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP2]] +; CHECK-NEXT: [[FOUND_CONFLICT34:%.*]] = and i1 [[BOUND032]], [[BOUND133]] +; CHECK-NEXT: [[CONFLICT_RDX35:%.*]] = or i1 [[CONFLICT_RDX31]], [[FOUND_CONFLICT34]] +; CHECK-NEXT: [[BOUND036:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP4]] +; CHECK-NEXT: [[BOUND137:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP3]] +; CHECK-NEXT: [[FOUND_CONFLICT38:%.*]] = and i1 [[BOUND036]], [[BOUND137]] +; CHECK-NEXT: [[CONFLICT_RDX39:%.*]] = or i1 [[CONFLICT_RDX35]], [[FOUND_CONFLICT38]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX39]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP47:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT40:%.*]] = insertelement <2 x i1> poison, i1 [[C_3]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT41:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT40]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[C_4]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[C_4]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT56:%.*]] = insertelement <2 x i1> poison, i1 [[C_4]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT57:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT56]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE74:.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP47]], +; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP1]], +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 +; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7:![0-9]+]], !noalias [[META10:![0-9]+]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 +; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF42:.*]], label %[[PRED_STORE_CONTINUE43:.*]] +; CHECK: [[PRED_STORE_IF42]]: +; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE43]] +; CHECK: [[PRED_STORE_CONTINUE43]]: +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF44:.*]], label %[[PRED_STORE_CONTINUE45:.*]] +; CHECK: [[PRED_STORE_IF44]]: +; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE45]] +; CHECK: [[PRED_STORE_CONTINUE45]]: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF46:.*]], label %[[PRED_STORE_CONTINUE47:.*]] +; CHECK: [[PRED_STORE_IF46]]: +; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE47]] +; CHECK: [[PRED_STORE_CONTINUE47]]: +; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP2]], +; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP11]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 +; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF48:.*]], label %[[PRED_STORE_CONTINUE49:.*]] +; CHECK: [[PRED_STORE_IF48]]: +; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE49]] +; CHECK: [[PRED_STORE_CONTINUE49]]: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1 +; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF50:.*]], label %[[PRED_STORE_CONTINUE51:.*]] +; CHECK: [[PRED_STORE_IF50]]: +; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE51]] +; CHECK: [[PRED_STORE_CONTINUE51]]: +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 +; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF52:.*]], label %[[PRED_STORE_CONTINUE53:.*]] +; CHECK: [[PRED_STORE_IF52]]: +; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE53]] +; CHECK: [[PRED_STORE_CONTINUE53]]: +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1 +; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF54:.*]], label %[[PRED_STORE_CONTINUE55:.*]] +; CHECK: [[PRED_STORE_IF54]]: +; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE55]] +; CHECK: [[PRED_STORE_CONTINUE55]]: +; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP18]], <2 x i1> [[BROADCAST_SPLAT57]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[BROADCAST_SPLAT57]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = or <2 x i1> [[TMP47]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <2 x i1> [[TMP1]], [[TMP21]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP20]], <2 x i64> zeroinitializer, <2 x i64> +; CHECK-NEXT: [[PREDPHI58:%.*]] = select <2 x i1> [[TMP21]], <2 x i64> zeroinitializer, <2 x i64> +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP22]], i32 0 +; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF59:.*]], label %[[PRED_STORE_CONTINUE60:.*]] +; CHECK: [[PRED_STORE_IF59]]: +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i64> [[PREDPHI]], i32 0 +; CHECK-NEXT: store i64 [[TMP25]], ptr [[DST_2]], align 8, !alias.scope [[META17:![0-9]+]], !noalias [[META18:![0-9]+]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE60]] +; CHECK: [[PRED_STORE_CONTINUE60]]: +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP22]], i32 1 +; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF61:.*]], label %[[PRED_STORE_CONTINUE62:.*]] +; CHECK: [[PRED_STORE_IF61]]: +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i64> [[PREDPHI]], i32 1 +; CHECK-NEXT: store i64 [[TMP27]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE62]] +; CHECK: [[PRED_STORE_CONTINUE62]]: +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP23]], i32 0 +; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF63:.*]], label %[[PRED_STORE_CONTINUE64:.*]] +; CHECK: [[PRED_STORE_IF63]]: +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 0 +; CHECK-NEXT: store i64 [[TMP29]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE64]] +; CHECK: [[PRED_STORE_CONTINUE64]]: +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP23]], i32 1 +; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF65:.*]], label %[[PRED_STORE_CONTINUE66:.*]] +; CHECK: [[PRED_STORE_IF65]]: +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 1 +; CHECK-NEXT: store i64 [[TMP31]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE66]] +; CHECK: [[PRED_STORE_CONTINUE66]]: +; CHECK-NEXT: [[TMP32:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], +; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], +; CHECK-NEXT: [[TMP34:%.*]] = select <2 x i1> [[TMP18]], <2 x i1> [[TMP32]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[TMP33]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP36:%.*]] = or <2 x i1> [[TMP22]], [[TMP34]] +; CHECK-NEXT: [[TMP37:%.*]] = or <2 x i1> [[TMP23]], [[TMP35]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i1> [[TMP36]], i32 0 +; CHECK-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF67:.*]], label %[[PRED_STORE_CONTINUE68:.*]] +; CHECK: [[PRED_STORE_IF67]]: +; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]] +; CHECK-NEXT: store i64 [[TMP39]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE68]] +; CHECK: [[PRED_STORE_CONTINUE68]]: +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP36]], i32 1 +; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF69:.*]], label %[[PRED_STORE_CONTINUE70:.*]] +; CHECK: [[PRED_STORE_IF69]]: +; CHECK-NEXT: [[TMP41:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] +; CHECK-NEXT: store i64 [[TMP41]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE70]] +; CHECK: [[PRED_STORE_CONTINUE70]]: +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP37]], i32 0 +; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF71:.*]], label %[[PRED_STORE_CONTINUE72:.*]] +; CHECK: [[PRED_STORE_IF71]]: +; CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] +; CHECK-NEXT: store i64 [[TMP43]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE72]] +; CHECK: [[PRED_STORE_CONTINUE72]]: +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP37]], i32 1 +; CHECK-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF73:.*]], label %[[PRED_STORE_CONTINUE74]] +; CHECK: [[PRED_STORE_IF73]]: +; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] +; CHECK-NEXT: store i64 [[TMP45]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE74]] +; CHECK: [[PRED_STORE_CONTINUE74]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 +; CHECK-NEXT: br i1 [[TMP46]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[C1:%.*]] = icmp eq i64 [[X]], 0 ; CHECK-NEXT: br i1 [[C1]], label %[[THEN_4:.*]], label %[[THEN_1:.*]] ; CHECK: [[THEN_1]]: @@ -188,7 +380,7 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 64 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -250,4 +442,20 @@ declare i64 @llvm.umin.i64(i64, i64) ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} ; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]} +; CHECK: [[META7]] = !{[[META8:![0-9]+]]} +; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]]} +; CHECK: [[META9]] = distinct !{[[META9]], !"LVerDomain"} +; CHECK: [[META10]] = !{[[META11:![0-9]+]], [[META12:![0-9]+]], [[META13:![0-9]+]], [[META14:![0-9]+]]} +; CHECK: [[META11]] = distinct !{[[META11]], [[META9]]} +; CHECK: [[META12]] = distinct !{[[META12]], [[META9]]} +; CHECK: [[META13]] = distinct !{[[META13]], [[META9]]} +; CHECK: [[META14]] = distinct !{[[META14]], [[META9]]} +; CHECK: [[META15]] = !{[[META11]]} +; CHECK: [[META16]] = !{[[META12]], [[META13]], [[META14]]} +; CHECK: [[META17]] = !{[[META12]]} +; CHECK: [[META18]] = !{[[META13]], [[META14]]} +; CHECK: [[META19]] = !{[[META14]]} +; CHECK: [[META20]] = !{[[META13]]} +; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]} +; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 2e7205ee8b3c4a..dd3ebeb2cbf21b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -18,6 +18,7 @@ define i32 @multi_exit_iv_uniform(i32 %a, i64 %N, ptr %dst) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[TMP2]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i32> [[BROADCAST_SPLAT]] to <4 x i64> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -27,7 +28,6 @@ define i32 @multi_exit_iv_uniform(i32 %a, i64 %N, ptr %dst) { ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i32> [[BROADCAST_SPLAT]] to <4 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP5]], i32 4 ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP8]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll index 9e54f944e423a2..0e95d742092e65 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -19,6 +19,7 @@ define void @test_invar_gep(ptr %dst) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -32,7 +33,6 @@ define void @test_invar_gep(ptr %dst) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 0 ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 4 ; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 95588d1176dcd8..47c0949cee59eb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -21,6 +21,18 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; DEFAULT-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 +; DEFAULT-NEXT: [[TMP13:%.*]] = add i64 [[Y]], 1 +; DEFAULT-NEXT: [[TMP14:%.*]] = add i64 [[Y]], 1 +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement poison, i32 [[X]], i64 0 +; DEFAULT-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector [[BROADCAST_SPLATINSERT6]], poison, zeroinitializer +; DEFAULT-NEXT: [[TMP25:%.*]] = lshr [[BROADCAST_SPLAT7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; DEFAULT-NEXT: [[TMP26:%.*]] = lshr [[BROADCAST_SPLAT7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; DEFAULT-NEXT: [[TMP31:%.*]] = shl [[BROADCAST_SPLAT7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; DEFAULT-NEXT: [[TMP32:%.*]] = shl [[BROADCAST_SPLAT7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement poison, i32 [[Z]], i64 0 +; DEFAULT-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector [[BROADCAST_SPLATINSERT8]], poison, zeroinitializer +; DEFAULT-NEXT: [[TMP39:%.*]] = or [[BROADCAST_SPLAT9]], [[BROADCAST_SPLAT7]] +; DEFAULT-NEXT: [[TMP40:%.*]] = or [[BROADCAST_SPLAT9]], [[BROADCAST_SPLAT7]] ; DEFAULT-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() ; DEFAULT-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], 4 ; DEFAULT-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], 1 @@ -29,10 +41,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; DEFAULT-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 4 ; DEFAULT-NEXT: [[TMP12:%.*]] = sub i32 [[TMP11]], 1 ; DEFAULT-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement poison, i32 0, i32 [[TMP12]] -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement poison, i32 [[X]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector [[BROADCAST_SPLATINSERT6]], poison, zeroinitializer -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement poison, i32 [[Z]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector [[BROADCAST_SPLATINSERT8]], poison, zeroinitializer ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -40,8 +48,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; DEFAULT-NEXT: [[VECTOR_RECUR2:%.*]] = phi [ [[VECTOR_RECUR_INIT1]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP57:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[VEC_PHI3:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP58:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[TMP13:%.*]] = add i64 [[Y]], 1 -; DEFAULT-NEXT: [[TMP14:%.*]] = add i64 [[Y]], 1 ; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP13]] ; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP14]] ; DEFAULT-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 4 @@ -56,22 +62,16 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; DEFAULT-NEXT: [[TMP22:%.*]] = call @llvm.vector.splice.nxv4i32( [[TMP19]], [[TMP20]], i32 -1) ; DEFAULT-NEXT: [[TMP23:%.*]] = or [[TMP21]], [[BROADCAST_SPLAT7]] ; DEFAULT-NEXT: [[TMP24:%.*]] = or [[TMP22]], [[BROADCAST_SPLAT7]] -; DEFAULT-NEXT: [[TMP25:%.*]] = lshr [[BROADCAST_SPLAT7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; DEFAULT-NEXT: [[TMP26:%.*]] = lshr [[BROADCAST_SPLAT7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; DEFAULT-NEXT: [[TMP27:%.*]] = shl [[TMP23]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; DEFAULT-NEXT: [[TMP28:%.*]] = shl [[TMP24]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; DEFAULT-NEXT: [[TMP29:%.*]] = or [[TMP27]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) ; DEFAULT-NEXT: [[TMP30:%.*]] = or [[TMP28]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) -; DEFAULT-NEXT: [[TMP31:%.*]] = shl [[BROADCAST_SPLAT7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; DEFAULT-NEXT: [[TMP32:%.*]] = shl [[BROADCAST_SPLAT7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; DEFAULT-NEXT: [[TMP33:%.*]] = or [[TMP25]], [[TMP31]] ; DEFAULT-NEXT: [[TMP34:%.*]] = or [[TMP26]], [[TMP32]] ; DEFAULT-NEXT: [[TMP35:%.*]] = or [[TMP33]], [[TMP29]] ; DEFAULT-NEXT: [[TMP36:%.*]] = or [[TMP34]], [[TMP30]] ; DEFAULT-NEXT: [[TMP37:%.*]] = or [[TMP35]], [[BROADCAST_SPLAT7]] ; DEFAULT-NEXT: [[TMP38:%.*]] = or [[TMP36]], [[BROADCAST_SPLAT7]] -; DEFAULT-NEXT: [[TMP39:%.*]] = or [[BROADCAST_SPLAT9]], [[BROADCAST_SPLAT7]] -; DEFAULT-NEXT: [[TMP40:%.*]] = or [[BROADCAST_SPLAT9]], [[BROADCAST_SPLAT7]] ; DEFAULT-NEXT: [[TMP41:%.*]] = and [[TMP39]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; DEFAULT-NEXT: [[TMP42:%.*]] = and [[TMP40]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; DEFAULT-NEXT: [[TMP43:%.*]] = xor [[TMP41]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) @@ -167,6 +167,14 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], [[TMP9]] ; PRED-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 ; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP0]]) +; PRED-NEXT: [[TMP19:%.*]] = add i64 [[Y]], 1 +; PRED-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[X]], i64 0 +; PRED-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; PRED-NEXT: [[TMP25:%.*]] = lshr [[BROADCAST_SPLAT4]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; PRED-NEXT: [[TMP28:%.*]] = shl [[BROADCAST_SPLAT4]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; PRED-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i32 [[Z]], i64 0 +; PRED-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer +; PRED-NEXT: [[TMP32:%.*]] = or [[BROADCAST_SPLAT6]], [[BROADCAST_SPLAT4]] ; PRED-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() ; PRED-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 4 ; PRED-NEXT: [[TMP15:%.*]] = sub i32 [[TMP14]], 1 @@ -175,10 +183,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[TMP17:%.*]] = mul i32 [[TMP16]], 4 ; PRED-NEXT: [[TMP18:%.*]] = sub i32 [[TMP17]], 1 ; PRED-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement poison, i32 0, i32 [[TMP18]] -; PRED-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[X]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; PRED-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i32 [[Z]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer ; PRED-NEXT: br label [[VECTOR_BODY:%.*]] ; PRED: vector.body: ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -186,7 +190,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ] ; PRED-NEXT: [[VECTOR_RECUR2:%.*]] = phi [ [[VECTOR_RECUR_INIT1]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] ; PRED-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[VECTOR_BODY]] ] -; PRED-NEXT: [[TMP19:%.*]] = add i64 [[Y]], 1 ; PRED-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP19]] ; PRED-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP21]], i64 0 @@ -194,14 +197,11 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[TMP22]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR]], [[BROADCAST_SPLAT]], i32 -1) ; PRED-NEXT: [[TMP23:%.*]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR2]], [[TMP22]], i32 -1) ; PRED-NEXT: [[TMP24:%.*]] = or [[TMP23]], [[BROADCAST_SPLAT4]] -; PRED-NEXT: [[TMP25:%.*]] = lshr [[BROADCAST_SPLAT4]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; PRED-NEXT: [[TMP26:%.*]] = shl [[TMP24]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; PRED-NEXT: [[TMP27:%.*]] = or [[TMP26]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) -; PRED-NEXT: [[TMP28:%.*]] = shl [[BROADCAST_SPLAT4]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; PRED-NEXT: [[TMP29:%.*]] = or [[TMP25]], [[TMP28]] ; PRED-NEXT: [[TMP30:%.*]] = or [[TMP29]], [[TMP27]] ; PRED-NEXT: [[TMP31:%.*]] = or [[TMP30]], [[BROADCAST_SPLAT4]] -; PRED-NEXT: [[TMP32:%.*]] = or [[BROADCAST_SPLAT6]], [[BROADCAST_SPLAT4]] ; PRED-NEXT: [[TMP33:%.*]] = and [[TMP32]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; PRED-NEXT: [[TMP34:%.*]] = xor [[TMP33]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; PRED-NEXT: [[TMP35:%.*]] = zext [[TMP34]] to diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index ab4e7e61822626..d62e5991da0ad0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -159,6 +159,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT: vector.ph: ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT3]], <16 x i16> poison, <16 x i32> zeroinitializer +; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT4]] to <16 x i8> ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -169,7 +170,6 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer ; DEFAULT-NEXT: [[TMP5:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT2]] to <16 x i8> ; DEFAULT-NEXT: [[TMP6:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT2]] to <16 x i8> -; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT4]] to <16 x i8> ; DEFAULT-NEXT: [[TMP8:%.*]] = and <16 x i8> [[TMP5]], [[TMP7]] ; DEFAULT-NEXT: [[TMP9:%.*]] = and <16 x i8> [[TMP6]], [[TMP7]] ; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]] @@ -198,6 +198,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement poison, i16 [[X]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector [[BROADCAST_SPLATINSERT6]], poison, zeroinitializer +; DEFAULT-NEXT: [[TMP24:%.*]] = trunc [[BROADCAST_SPLAT7]] to ; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; DEFAULT: vec.epilog.vector.body: ; DEFAULT-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -206,7 +207,6 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP22]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; DEFAULT-NEXT: [[TMP23:%.*]] = trunc [[BROADCAST_SPLAT]] to -; DEFAULT-NEXT: [[TMP24:%.*]] = trunc [[BROADCAST_SPLAT7]] to ; DEFAULT-NEXT: [[TMP25:%.*]] = and [[TMP23]], [[TMP24]] ; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP21]] ; DEFAULT-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i32 0 @@ -247,6 +247,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; PRED: vector.ph: ; PRED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT1]], <16 x i16> poison, <16 x i32> zeroinitializer +; PRED-NEXT: [[TMP3:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT2]] to <16 x i8> ; PRED-NEXT: br label [[VECTOR_BODY:%.*]] ; PRED: vector.body: ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -255,7 +256,6 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer ; PRED-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT]] to <16 x i8> -; PRED-NEXT: [[TMP3:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT2]] to <16 x i8> ; PRED-NEXT: [[TMP4:%.*]] = and <16 x i8> [[TMP2]], [[TMP3]] ; PRED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]] ; PRED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll index 4768167a9c69f1..43f264cc1e0b85 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll @@ -170,12 +170,12 @@ define void @invariant_load_cond(ptr noalias nocapture %a, ptr nocapture readonl ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 168 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll index 62eb25e8b4a0cc..813bf34b071bf3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll @@ -3,9 +3,9 @@ define void @invariant_load(i64 %n, ptr noalias nocapture %a, ptr nocapture readonly %b) { ; CHECK-LABEL: @invariant_load -; CHECK: vector.body: ; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, ptr %b, i64 42 -; CHECK-NEXT: %[[INVLOAD:.*]] = load i32, ptr %[[GEP]] +; CHECK: vector.body: +; CHECK: %[[INVLOAD:.*]] = load i32, ptr %[[GEP]] ; CHECK-NEXT: %[[SPLATINS:.*]] = insertelement poison, i32 %[[INVLOAD]], i64 0 ; CHECK-NEXT: %[[SPLAT:.*]] = shufflevector %[[SPLATINS]], poison, zeroinitializer ; CHECK: %[[LOAD:.*]] = load , ptr diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll index 0d3f368d3551b6..c8114e3d28f4e2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll @@ -4,13 +4,13 @@ target triple = "aarch64-unknown-linux-gnu" define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 { ; CHECK-LABEL: @widen_extractvalue( -; CHECK: vector.body: ; CHECK: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[EXTRACT0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1 ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i64 [[EXTRACT1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer +; CHECK: vector.body: ; CHECK: [[ADD:%.*]] = add [[DOTSPLAT]], [[DOTSPLAT2]] entry: br label %loop.body diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index df02cb741700e5..aa55cd909c569d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -339,6 +339,9 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 9 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = xor [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i32 -1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 9, i64 0), poison, zeroinitializer) @@ -346,13 +349,10 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 9, [[TMP5]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[A]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = xor [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i32 -1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0( [[TMP10]], [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index 7ca1b5395dd013..9a2fcf43c81575 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -439,6 +439,7 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -446,7 +447,6 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP9]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP11:%.*]] = udiv [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] @@ -484,6 +484,8 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED: vector.ph: ; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 ; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -495,8 +497,6 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer ; FIXED-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> ; FIXED-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> ; FIXED-NEXT: [[TMP10:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], [[TMP8]] @@ -570,6 +570,7 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -577,7 +578,6 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP9]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] @@ -615,6 +615,8 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED: vector.ph: ; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 ; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -626,8 +628,6 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer ; FIXED-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> ; FIXED-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> ; FIXED-NEXT: [[TMP10:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index a91f92348ab261..1990590252ea6c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -9,8 +9,10 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT2]] to <4 x i32> ; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ] @@ -18,11 +20,9 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 3) ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT2]] to <4 x i32> ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index a2c3f2bfa274c1..315064a474a2f6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -288,6 +288,8 @@ define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64 ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[N]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = trunc [[BROADCAST_SPLAT]] to +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[X]] to i64 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[T]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, ptr [[DST]], i64 0 @@ -295,9 +297,7 @@ define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = trunc [[BROADCAST_SPLAT]] to ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq [[TMP7]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[X]] to i64 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP9]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, ptr [[TMP10]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll index 07a1cca1bc21e7..b9a6235db6fa60 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll @@ -9,6 +9,7 @@ define void @test(ptr %p, i40 %a) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i40> poison, i40 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i40> [[BROADCAST_SPLATINSERT1]], <16 x i40> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE32:%.*]] ] @@ -16,7 +17,6 @@ define void @test(ptr %p, i40 %a) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], ; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i40> [[TMP2]] to <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1> diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 8877e65a0d9d73..f85f5bd0e27afc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -782,44 +782,44 @@ define i64 @cost_assume(ptr %end, i64 %N) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[N:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3]] = add <2 x i64> [[VEC_PHI]], -; CHECK-NEXT: [[TMP4]] = add <2 x i64> [[VEC_PHI2]], -; CHECK-NEXT: [[TMP5]] = add <2 x i64> [[VEC_PHI3]], -; CHECK-NEXT: [[TMP6]] = add <2 x i64> [[VEC_PHI4]], -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7]] = add <2 x i64> [[VEC_PHI]], +; CHECK-NEXT: [[TMP8]] = add <2 x i64> [[VEC_PHI2]], +; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[VEC_PHI3]], +; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[VEC_PHI4]], +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]]) -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]]) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]]) -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]]) -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]]) -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP17]]) -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP18]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[BIN_RDX5:%.*]] = add <2 x i64> [[TMP5]], [[BIN_RDX]] -; CHECK-NEXT: [[BIN_RDX6:%.*]] = add <2 x i64> [[TMP6]], [[BIN_RDX5]] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[BIN_RDX5:%.*]] = add <2 x i64> [[TMP9]], [[BIN_RDX]] +; CHECK-NEXT: [[BIN_RDX6:%.*]] = add <2 x i64> [[TMP10]], [[BIN_RDX5]] ; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -866,29 +866,37 @@ define void @reduction_store(ptr noalias %src, ptr %dst, i1 %x) #2 { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i1> poison, i1 [[X:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT]], <8 x i1> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[X:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = zext <4 x i1> [[BROADCAST_SPLAT]] to <4 x i64> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i1> [[BROADCAST_SPLAT]] to <8 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = lshr <8 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i64> [[TMP4]] to <8 x i32> -; CHECK-NEXT: [[TMP6]] = and <8 x i32> [[VEC_PHI]], [[TMP5]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP8]] to <4 x i32> +; CHECK-NEXT: [[TMP11]] = and <4 x i32> [[VEC_PHI]], [[TMP9]] +; CHECK-NEXT: [[TMP12]] = and <4 x i32> [[VEC_PHI1]], [[TMP10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 24 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 24 +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP6]]) -; CHECK-NEXT: store i32 [[TMP8]], ptr [[DST:%.*]], align 4 +; CHECK-NEXT: [[BIN_RDX:%.*]] = and <4 x i32> [[TMP12]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[BIN_RDX]]) +; CHECK-NEXT: store i32 [[TMP14]], ptr [[DST:%.*]], align 4 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], [[LOOP]] ] @@ -996,6 +1004,65 @@ exit: ret i64 %res } +; Test case for https://github.com/llvm/llvm-project/issues/107501. +define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) { +; CHECK-LABEL: @cost_loop_invariant_recipes( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[X:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[Y:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[TMP0]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[BROADCAST_SPLAT2]], [[TMP1]] +; CHECK-NEXT: [[TMP3]] = mul <2 x i64> [[TMP2]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[TMP3]]) +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT_I_I_I:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RED_MUL:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[NOT_X:%.*]] = xor i1 [[X]], true +; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[NOT_X]] to i64 +; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[Y]], [[EXT]] +; CHECK-NEXT: [[RED_MUL]] = mul i64 [[SHL]], [[RED]] +; CHECK-NEXT: [[IV_NEXT_I_I_I]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[RED_MUL_LCSSA:%.*]] = phi i64 [ [[RED_MUL]], [[LOOP]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[RED_MUL_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next.i.i.i, %loop ] + %red = phi i64 [ 1, %entry ], [ %red.mul, %loop ] + %not.x = xor i1 %x, true + %ext = zext i1 %not.x to i64 + %shl = shl i64 %y, %ext + %red.mul = mul i64 %shl, %red + %iv.next.i.i.i = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 1 + br i1 %ec, label %exit, label %loop + +exit: + ret i64 %red.mul +} + declare void @llvm.assume(i1 noundef) #0 attributes #0 = { "target-cpu"="penryn" } diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll index 32964d650ea0f7..7566844ff39df8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll @@ -19,6 +19,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; CHECK-NEXT: [[TMP17:%.*]] = sdiv i64 [[M]], [[CONV6]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -29,7 +30,6 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP17:%.*]] = sdiv i64 [[M]], [[CONV6]] ; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP17]] to i32 ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP17]], [[CONV61]] ; CHECK-NEXT: [[TMP21:%.*]] = sub i64 [[TMP5]], [[TMP20]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll index 9f9db3ad859911..c9a47a60866f17 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll @@ -26,6 +26,7 @@ define i32 @inv_load_conditional(ptr %a, i64 %n, ptr %b, i32 %k) { ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775792 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x ptr> poison, ptr [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x ptr> [[BROADCAST_SPLATINSERT]], <16 x ptr> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x ptr> [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT4]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -37,7 +38,6 @@ define i32 @inv_load_conditional(ptr %a, i64 %n, ptr %b, i32 %k) { ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x ptr> [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[BROADCAST_SPLAT]], i32 4, <16 x i1> [[TMP3]], <16 x i32> poison), !alias.scope [[META3]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP3]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[PREDPHI]], i64 15 @@ -52,6 +52,7 @@ define i32 @inv_load_conditional(ptr %a, i64 %n, ptr %b, i32 %k) { ; CHECK-NEXT: [[N_VEC7:%.*]] = and i64 [[SMAX2]], 9223372036854775800 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <8 x ptr> poison, ptr [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT10]], <8 x ptr> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <8 x ptr> [[BROADCAST_SPLAT11]], zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT12]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -63,7 +64,6 @@ define i32 @inv_load_conditional(ptr %a, i64 %n, ptr %b, i32 %k) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC7]] ; CHECK-NEXT: br i1 [[TMP6]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <8 x ptr> [[BROADCAST_SPLAT11]], zeroinitializer ; CHECK-NEXT: [[WIDE_MASKED_GATHER14:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[BROADCAST_SPLAT11]], i32 4, <8 x i1> [[TMP7]], <8 x i32> poison), !alias.scope [[META11]] ; CHECK-NEXT: [[PREDPHI15:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[WIDE_MASKED_GATHER14]], <8 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[PREDPHI15]], i64 7 diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll index aaaea2f39c2c85..7d278988c53f85 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -114,13 +114,13 @@ define i32 @uniform_address(ptr align(4) %addr, i32 %byte_offset) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = udiv i32 [[BYTE_OFFSET:%.*]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[BYTE_OFFSET]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = udiv i32 [[BYTE_OFFSET]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = udiv i32 [[BYTE_OFFSET]], 4 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ADDR:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ADDR]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[ADDR]], i32 [[TMP2]] diff --git a/llvm/test/Transforms/LoopVectorize/blend-in-header.ll b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll index 01e223a3243796..8b06d9f61e5da1 100644 --- a/llvm/test/Transforms/LoopVectorize/blend-in-header.ll +++ b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll @@ -115,10 +115,10 @@ define i64 @invar_cond(i1 %c) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 45545feffd3257..8e56614a2e3d5c 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -118,6 +118,7 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Live-in ir<20001> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -126,7 +127,6 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> -; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur>, ir<%recur.next> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: @@ -198,6 +198,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Live-in ir<20001> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -207,7 +208,6 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> ; CHECK-NEXT: EMIT vp<[[WIDEN_CAN:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDEN_CAN]]>, vp<[[BTC]]> -; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur>, ir<%recur.next> ; CHECK-NEXT: WIDEN ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%rem>, ir<%recur.next> @@ -382,6 +382,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -390,7 +391,6 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> -; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur>, ir<%recur.next> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 10b5aa64c180a0..b54d3cd7dd1850 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -63,13 +63,13 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NO-IC-NEXT: [[TMP20]] = load i32, ptr [[ARRAYIDX32]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP21]] = load i32, ptr [[ARRAYIDX32]], align 4 ; UNROLL-NO-IC-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; UNROLL-NO-IC-NEXT: [[ADD35:%.*]] = add i32 [[TMP20]], [[SCALAR_RECUR]] +; UNROLL-NO-IC-NEXT: [[ADD35:%.*]] = add i32 [[TMP21]], [[TMP20]] ; UNROLL-NO-IC-NEXT: store i32 [[ADD35]], ptr [[ARRAYIDX34]], align 4 ; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] @@ -119,13 +119,13 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP16:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NO-VF-NEXT: [[TMP16]] = load i32, ptr [[ARRAYIDX32]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP17]] = load i32, ptr [[ARRAYIDX32]], align 4 ; UNROLL-NO-VF-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; UNROLL-NO-VF-NEXT: [[ADD35:%.*]] = add i32 [[TMP16]], [[SCALAR_RECUR]] +; UNROLL-NO-VF-NEXT: [[ADD35:%.*]] = add i32 [[TMP17]], [[TMP16]] ; UNROLL-NO-VF-NEXT: store i32 [[ADD35]], ptr [[ARRAYIDX34]], align 4 ; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] @@ -173,13 +173,13 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[SCALAR_BODY]] ] +; SINK-AFTER-NEXT: [[TMP12:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; SINK-AFTER-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]] -; SINK-AFTER-NEXT: [[TMP12]] = load i32, ptr [[ARRAYIDX32]], align 4 +; SINK-AFTER-NEXT: [[TMP13]] = load i32, ptr [[ARRAYIDX32]], align 4 ; SINK-AFTER-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; SINK-AFTER-NEXT: [[ADD35:%.*]] = add i32 [[TMP12]], [[SCALAR_RECUR]] +; SINK-AFTER-NEXT: [[ADD35:%.*]] = add i32 [[TMP13]], [[TMP12]] ; SINK-AFTER-NEXT: store i32 [[ADD35]], ptr [[ARRAYIDX34]], align 4 ; SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] @@ -282,12 +282,12 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[MINMAX_0_LCSSA]] ; UNROLL-NO-IC: scalar.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP22:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; UNROLL-NO-IC-NEXT: [[TMP21]] = load i32, ptr [[ARRAYIDX]], align 4 -; UNROLL-NO-IC-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP21]], [[SCALAR_RECUR]] +; UNROLL-NO-IC-NEXT: [[TMP22]] = load i32, ptr [[ARRAYIDX]], align 4 +; UNROLL-NO-IC-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP22]], [[TMP21]] ; UNROLL-NO-IC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0 ; UNROLL-NO-IC-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]] @@ -351,12 +351,12 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; UNROLL-NO-VF-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] ; UNROLL-NO-VF-NEXT: ret i32 [[MINMAX_0_LCSSA]] ; UNROLL-NO-VF: scalar.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; UNROLL-NO-VF-NEXT: [[TMP18]] = load i32, ptr [[ARRAYIDX]], align 4 -; UNROLL-NO-VF-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP18]], [[SCALAR_RECUR]] +; UNROLL-NO-VF-NEXT: [[TMP19]] = load i32, ptr [[ARRAYIDX]], align 4 +; UNROLL-NO-VF-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP19]], [[TMP18]] ; UNROLL-NO-VF-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0 ; UNROLL-NO-VF-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0 ; UNROLL-NO-VF-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]] @@ -415,12 +415,12 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; SINK-AFTER-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] ; SINK-AFTER-NEXT: ret i32 [[MINMAX_0_LCSSA]] ; SINK-AFTER: scalar.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[SCALAR_BODY]] ] +; SINK-AFTER-NEXT: [[TMP12:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; SINK-AFTER-NEXT: [[TMP12]] = load i32, ptr [[ARRAYIDX]], align 4 -; SINK-AFTER-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP12]], [[SCALAR_RECUR]] +; SINK-AFTER-NEXT: [[TMP13]] = load i32, ptr [[ARRAYIDX]], align 4 +; SINK-AFTER-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP13]], [[TMP12]] ; SINK-AFTER-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0 ; SINK-AFTER-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0 ; SINK-AFTER-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]] @@ -538,12 +538,12 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP24:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP25:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[ADVARS_IV]] -; UNROLL-NO-IC-NEXT: [[TMP24]] = load i16, ptr [[ARRAYIDX5]], align 2 -; UNROLL-NO-IC-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP24]] to double -; UNROLL-NO-IC-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double +; UNROLL-NO-IC-NEXT: [[TMP25]] = load i16, ptr [[ARRAYIDX5]], align 2 +; UNROLL-NO-IC-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP25]] to double +; UNROLL-NO-IC-NEXT: [[CONV11:%.*]] = sitofp i16 [[TMP24]] to double ; UNROLL-NO-IC-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]] ; UNROLL-NO-IC-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[ADVARS_IV]] @@ -611,12 +611,12 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[ADVARS_IV]] -; UNROLL-NO-VF-NEXT: [[TMP20]] = load i16, ptr [[ARRAYIDX5]], align 2 -; UNROLL-NO-VF-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP20]] to double -; UNROLL-NO-VF-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double +; UNROLL-NO-VF-NEXT: [[TMP21]] = load i16, ptr [[ARRAYIDX5]], align 2 +; UNROLL-NO-VF-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP21]] to double +; UNROLL-NO-VF-NEXT: [[CONV11:%.*]] = sitofp i16 [[TMP20]] to double ; UNROLL-NO-VF-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]] ; UNROLL-NO-VF-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[ADVARS_IV]] @@ -682,12 +682,12 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[SCALAR_BODY]] ] +; SINK-AFTER-NEXT: [[TMP14:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[ADVARS_IV]] -; SINK-AFTER-NEXT: [[TMP14]] = load i16, ptr [[ARRAYIDX5]], align 2 -; SINK-AFTER-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP14]] to double -; SINK-AFTER-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double +; SINK-AFTER-NEXT: [[TMP15]] = load i16, ptr [[ARRAYIDX5]], align 2 +; SINK-AFTER-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP15]] to double +; SINK-AFTER-NEXT: [[CONV11:%.*]] = sitofp i16 [[TMP14]] to double ; SINK-AFTER-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]] ; SINK-AFTER-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]] ; SINK-AFTER-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[ADVARS_IV]] @@ -928,13 +928,13 @@ define i32 @PR27246() { ; UNROLL-NO-IC-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[E_1_LCSSA_LCSSA]] ; UNROLL-NO-IC: for.cond1: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; UNROLL-NO-IC-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; UNROLL-NO-IC-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; UNROLL-NO-IC-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1 ; UNROLL-NO-IC-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1 ; UNROLL-NO-IC-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]] ; UNROLL-NO-IC: for.cond.cleanup3: -; UNROLL-NO-IC-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49 ; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]] @@ -972,13 +972,13 @@ define i32 @PR27246() { ; UNROLL-NO-VF-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ] ; UNROLL-NO-VF-NEXT: ret i32 [[E_1_LCSSA_LCSSA]] ; UNROLL-NO-VF: for.cond1: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; UNROLL-NO-VF-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; UNROLL-NO-VF-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; UNROLL-NO-VF-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1 ; UNROLL-NO-VF-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1 ; UNROLL-NO-VF-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]] ; UNROLL-NO-VF: for.cond.cleanup3: -; UNROLL-NO-VF-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49 ; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]] @@ -1021,13 +1021,13 @@ define i32 @PR27246() { ; SINK-AFTER-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ] ; SINK-AFTER-NEXT: ret i32 [[E_1_LCSSA_LCSSA]] ; SINK-AFTER: for.cond1: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; SINK-AFTER-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; SINK-AFTER-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; SINK-AFTER-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1 ; SINK-AFTER-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1 ; SINK-AFTER-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]] ; SINK-AFTER: for.cond.cleanup3: -; SINK-AFTER-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49 ; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]] @@ -1120,19 +1120,19 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP34]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP34]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: ; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VAR0:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 ; UNROLL-NO-IC-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_NEXT]] ; UNROLL-NO-IC-NEXT: [[VAR2]] = load i32, ptr [[VAR1]], align 4 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] ; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; UNROLL-NO-IC: for.end: -; UNROLL-NO-IC-NEXT: [[VAR0_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[SCALAR_BODY]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[VAR0_LCSSA:%.*]] = phi i32 [ [[VAR0]], [[SCALAR_BODY]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[VAR0_LCSSA]] ; ; UNROLL-NO-VF-LABEL: @PR30183( @@ -1166,19 +1166,19 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: ; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VAR0:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 ; UNROLL-NO-VF-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_NEXT]] ; UNROLL-NO-VF-NEXT: [[VAR2]] = load i32, ptr [[VAR1]], align 4 ; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] ; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; UNROLL-NO-VF: for.end: -; UNROLL-NO-VF-NEXT: [[VAR0_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[SCALAR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[VAR0_LCSSA:%.*]] = phi i32 [ [[VAR0]], [[SCALAR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: ret i32 [[VAR0_LCSSA]] ; ; SINK-AFTER-LABEL: @PR30183( @@ -1225,19 +1225,19 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: ; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ] +; SINK-AFTER-NEXT: [[VAR0:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 ; SINK-AFTER-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_NEXT]] ; SINK-AFTER-NEXT: [[VAR2]] = load i32, ptr [[VAR1]], align 4 ; SINK-AFTER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] ; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; SINK-AFTER: for.end: -; SINK-AFTER-NEXT: [[VAR0_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[SCALAR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[VAR0_LCSSA:%.*]] = phi i32 [ [[VAR0]], [[SCALAR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: ret i32 [[VAR0_LCSSA]] ; entry: @@ -1272,48 +1272,48 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: ; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VAR3]] = add i64 0, 1 ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 1000 ; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; UNROLL-NO-IC: for.end: -; UNROLL-NO-IC-NEXT: [[VAR2_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[SCALAR_BODY]] ], [ 1, [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[VAR2_LCSSA:%.*]] = phi i64 [ [[VAR2]], [[SCALAR_BODY]] ], [ 1, [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: ret i64 [[VAR2_LCSSA]] ; ; UNROLL-NO-VF-LABEL: @constant_folded_previous_value( ; UNROLL-NO-VF-NEXT: entry: ; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-VF: vector.ph: +; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 0, 1 +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i64 0, 1 ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 0, 1 -; UNROLL-NO-VF-NEXT: [[TMP1]] = add i64 0, 1 +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP1]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: ; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[VAR3]] = add i64 0, 1 ; UNROLL-NO-VF-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 1000 ; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; UNROLL-NO-VF: for.end: -; UNROLL-NO-VF-NEXT: [[VAR2_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[SCALAR_BODY]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[VAR2_LCSSA:%.*]] = phi i64 [ [[VAR2]], [[SCALAR_BODY]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: ret i64 [[VAR2_LCSSA]] ; ; SINK-AFTER-LABEL: @constant_folded_previous_value( @@ -1330,18 +1330,18 @@ define i64 @constant_folded_previous_value() { ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: ; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] +; SINK-AFTER-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[VAR3]] = add i64 0, 1 ; SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; SINK-AFTER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 1000 ; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; SINK-AFTER: for.end: -; SINK-AFTER-NEXT: [[VAR2_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[SCALAR_BODY]] ], [ 1, [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[VAR2_LCSSA:%.*]] = phi i64 [ [[VAR2]], [[SCALAR_BODY]] ], [ 1, [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: ret i64 [[VAR2_LCSSA]] ; entry: @@ -1389,19 +1389,19 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: ; UNROLL-NO-IC-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VAL_PHI:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INC]] = add i32 [[INC_PHI]], 1 ; UNROLL-NO-IC-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64 ; UNROLL-NO-IC-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]] ; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95 ; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; UNROLL-NO-IC: for.end: -; UNROLL-NO-IC-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[VAL_PHI]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[VAL_PHI_LCSSA]] ; ; UNROLL-NO-VF-LABEL: @extract_second_last_iteration( @@ -1422,19 +1422,19 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: ; UNROLL-NO-VF-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VAL_PHI:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INC]] = add i32 [[INC_PHI]], 1 ; UNROLL-NO-VF-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64 ; UNROLL-NO-VF-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]] ; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95 ; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; UNROLL-NO-VF: for.end: -; UNROLL-NO-VF-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[VAL_PHI]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: ret i32 [[VAL_PHI_LCSSA]] ; ; SINK-AFTER-LABEL: @extract_second_last_iteration( @@ -1458,19 +1458,19 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2 ; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: ; SINK-AFTER-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ] +; SINK-AFTER-NEXT: [[VAL_PHI:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[INC]] = add i32 [[INC_PHI]], 1 ; SINK-AFTER-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64 ; SINK-AFTER-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]] ; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95 ; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; SINK-AFTER: for.end: -; SINK-AFTER-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[VAL_PHI]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: ret i32 [[VAL_PHI_LCSSA]] ; entry: @@ -1580,9 +1580,9 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP43]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.cond.cleanup: @@ -1592,10 +1592,10 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; UNROLL-NO-IC-NEXT: [[B_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP44:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[J_ADDR_09:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP44:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 [[IDXPROM]] ; UNROLL-NO-IC-NEXT: [[TMP44]] = load double, ptr [[ARRAYIDX]], align 8 -; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP44]] +; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = fmul double [[J_ADDR_09]], [[TMP44]] ; UNROLL-NO-IC-NEXT: [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00 ; UNROLL-NO-IC-NEXT: [[INC:%.*]] = zext i1 [[TOBOOL]] to i32 ; UNROLL-NO-IC-NEXT: [[A_1]] = add nsw i32 [[A_010]], [[INC]] @@ -1652,10 +1652,10 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; UNROLL-NO-VF-NEXT: [[B_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[J_ADDR_09:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 [[IDXPROM]] ; UNROLL-NO-VF-NEXT: [[TMP15]] = load double, ptr [[ARRAYIDX]], align 8 -; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP15]] +; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = fmul double [[J_ADDR_09]], [[TMP15]] ; UNROLL-NO-VF-NEXT: [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00 ; UNROLL-NO-VF-NEXT: [[INC:%.*]] = zext i1 [[TOBOOL]] to i32 ; UNROLL-NO-VF-NEXT: [[A_1]] = add nsw i32 [[A_010]], [[INC]] @@ -1709,9 +1709,9 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; SINK-AFTER-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP20]]) ; SINK-AFTER-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY]] ] ; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.cond.cleanup: @@ -1721,10 +1721,10 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; SINK-AFTER-NEXT: [[B_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP23:%.*]], [[FOR_BODY]] ] +; SINK-AFTER-NEXT: [[J_ADDR_09:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP23:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 [[IDXPROM]] ; SINK-AFTER-NEXT: [[TMP23]] = load double, ptr [[ARRAYIDX]], align 8 -; SINK-AFTER-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP23]] +; SINK-AFTER-NEXT: [[MUL:%.*]] = fmul double [[J_ADDR_09]], [[TMP23]] ; SINK-AFTER-NEXT: [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00 ; SINK-AFTER-NEXT: [[INC:%.*]] = zext i1 [[TOBOOL]] to i32 ; SINK-AFTER-NEXT: [[A_1]] = add nsw i32 [[A_010]], [[INC]] @@ -1811,17 +1811,17 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP22:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 +; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NO-IC-NEXT: [[TMP21]] = load i16, ptr [[ARRAYIDX2]], align 2 -; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP21]] to i32 +; UNROLL-NO-IC-NEXT: [[TMP22]] = load i16, ptr [[ARRAYIDX2]], align 2 +; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP22]] to i32 ; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-IC-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4 @@ -1867,17 +1867,17 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 +; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[TMP17]] to i32 ; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NO-VF-NEXT: [[TMP17]] = load i16, ptr [[ARRAYIDX2]], align 2 -; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP17]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP18]] = load i16, ptr [[ARRAYIDX2]], align 2 +; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP18]] to i32 ; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-VF-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4 @@ -1919,17 +1919,17 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP11:%.*]], [[FOR_BODY]] ] +; SINK-AFTER-NEXT: [[TMP11:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 +; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 ; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; SINK-AFTER-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]] -; SINK-AFTER-NEXT: [[TMP11]] = load i16, ptr [[ARRAYIDX2]], align 2 -; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP11]] to i32 +; SINK-AFTER-NEXT: [[TMP12]] = load i16, ptr [[ARRAYIDX2]], align 2 +; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP12]] to i32 ; SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; SINK-AFTER-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4 @@ -2049,18 +2049,18 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP49:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP50:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] ; UNROLL-NO-IC-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDVARS_IV]], i64 1 ; UNROLL-NO-IC-NEXT: store i32 7, ptr [[ARRAYCIDX]], align 4 -; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 -; UNROLL-NO-IC-NEXT: [[TMP49]] = load i16, ptr [[CUR_INDEX]], align 2 -; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP49]] to i32 +; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[TMP49]] to i32 +; UNROLL-NO-IC-NEXT: [[TMP50]] = load i16, ptr [[CUR_INDEX]], align 2 +; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP50]] to i32 ; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-IC-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4 @@ -2109,18 +2109,18 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] ; UNROLL-NO-VF-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDVARS_IV]], i64 1 ; UNROLL-NO-VF-NEXT: store i32 7, ptr [[ARRAYCIDX]], align 4 -; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP17]] = load i16, ptr [[CUR_INDEX]], align 2 -; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP17]] to i32 +; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[TMP17]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP18]] = load i16, ptr [[CUR_INDEX]], align 2 +; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP18]] to i32 ; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-VF-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4 @@ -2176,18 +2176,18 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP25:%.*]], [[FOR_BODY]] ] +; SINK-AFTER-NEXT: [[TMP25:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP26:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] ; SINK-AFTER-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDVARS_IV]], i64 1 ; SINK-AFTER-NEXT: store i32 7, ptr [[ARRAYCIDX]], align 4 -; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 -; SINK-AFTER-NEXT: [[TMP25]] = load i16, ptr [[CUR_INDEX]], align 2 -; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP25]] to i32 +; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[TMP25]] to i32 +; SINK-AFTER-NEXT: [[TMP26]] = load i16, ptr [[CUR_INDEX]], align 2 +; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP26]] to i32 ; SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; SINK-AFTER-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4 @@ -2277,18 +2277,18 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP23:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP24:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 +; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[TMP23]] to i32 ; UNROLL-NO-IC-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NO-IC-NEXT: [[TMP23]] = load i16, ptr [[ARRAYIDX2]], align 2 -; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP23]] to i32 +; UNROLL-NO-IC-NEXT: [[TMP24]] = load i16, ptr [[ARRAYIDX2]], align 2 +; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP24]] to i32 ; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-IC-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4 @@ -2336,18 +2336,18 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 +; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 ; UNROLL-NO-VF-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2 ; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NO-VF-NEXT: [[TMP19]] = load i16, ptr [[ARRAYIDX2]], align 2 -; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP19]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP20]] = load i16, ptr [[ARRAYIDX2]], align 2 +; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 ; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-VF-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4 @@ -2390,18 +2390,18 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] +; SINK-AFTER-NEXT: [[TMP12:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 +; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[TMP12]] to i32 ; SINK-AFTER-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2 ; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; SINK-AFTER-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]] -; SINK-AFTER-NEXT: [[TMP12]] = load i16, ptr [[ARRAYIDX2]], align 2 -; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP12]] to i32 +; SINK-AFTER-NEXT: [[TMP13]] = load i16, ptr [[ARRAYIDX2]], align 2 +; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP13]] to i32 ; SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]] ; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; SINK-AFTER-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4 @@ -2577,16 +2577,16 @@ define void @sink_dead_inst(ptr %a) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT3]], [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT3]], [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_COND:%.*]] ; UNROLL-NO-IC: for.cond: ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR5:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ] -; UNROLL-NO-IC-NEXT: [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10 -; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR5]], 15 +; UNROLL-NO-IC-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] +; UNROLL-NO-IC-NEXT: [[REC_2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ] +; UNROLL-NO-IC-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10 +; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[REC_2]], 15 ; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; UNROLL-NO-IC-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 ; UNROLL-NO-IC-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 @@ -2633,10 +2633,10 @@ define void @sink_dead_inst(ptr %a) { ; UNROLL-NO-VF-NEXT: br label [[FOR_COND:%.*]] ; UNROLL-NO-VF: for.cond: ; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR3:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT2]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ] -; UNROLL-NO-VF-NEXT: [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10 -; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR3]], 15 +; UNROLL-NO-VF-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] +; UNROLL-NO-VF-NEXT: [[REC_2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT2]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ] +; UNROLL-NO-VF-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10 +; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[REC_2]], 15 ; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; UNROLL-NO-VF-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 ; UNROLL-NO-VF-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 @@ -2682,10 +2682,10 @@ define void @sink_dead_inst(ptr %a) { ; SINK-AFTER-NEXT: br label [[FOR_COND:%.*]] ; SINK-AFTER: for.cond: ; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR4:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ] -; SINK-AFTER-NEXT: [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10 -; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR4]], 15 +; SINK-AFTER-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] +; SINK-AFTER-NEXT: [[REC_2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ] +; SINK-AFTER-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10 +; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[REC_2]], 15 ; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; SINK-AFTER-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 ; SINK-AFTER-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 @@ -2839,9 +2839,9 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-IC-NEXT: ret i32 [[VAR]] ; UNROLL-NO-IC: bb2: ; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]] +; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] ; UNROLL-NO-IC-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-IC-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 @@ -2904,9 +2904,9 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-VF-NEXT: ret i32 [[VAR]] ; UNROLL-NO-VF: bb2: ; UNROLL-NO-VF-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; UNROLL-NO-VF-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; UNROLL-NO-VF-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]] +; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] ; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 @@ -2992,9 +2992,9 @@ define i32 @sink_into_replication_region(i32 %y) { ; SINK-AFTER-NEXT: ret i32 [[VAR]] ; SINK-AFTER: bb2: ; SINK-AFTER-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; SINK-AFTER-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; SINK-AFTER-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]] +; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] ; SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 @@ -3209,10 +3209,10 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC: bb2: ; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; UNROLL-NO-IC-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]] -; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]] +; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] ; UNROLL-NO-IC-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; UNROLL-NO-IC-NEXT: store i32 [[VAR3]], ptr [[G]], align 4 ; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 @@ -3282,9 +3282,9 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP17]], [[TMP16]] ; UNROLL-NO-VF-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]], !prof [[PROF29]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB:%.*]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]] ; UNROLL-NO-VF: bb1: @@ -3293,10 +3293,10 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-VF: bb2: ; UNROLL-NO-VF-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; UNROLL-NO-VF-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; UNROLL-NO-VF-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; UNROLL-NO-VF-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]] -; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]] +; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] ; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; UNROLL-NO-VF-NEXT: store i32 [[VAR3]], ptr [[G]], align 4 ; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 @@ -3406,9 +3406,9 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3 ; SINK-AFTER-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]], !prof [[PROF29]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB:%.*]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP39]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; SINK-AFTER-NEXT: br label [[BB2:%.*]] ; SINK-AFTER: bb1: @@ -3417,10 +3417,10 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER: bb2: ; SINK-AFTER-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; SINK-AFTER-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; SINK-AFTER-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; SINK-AFTER-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; SINK-AFTER-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]] -; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]] +; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] ; SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; SINK-AFTER-NEXT: store i32 [[VAR3]], ptr [[G]], align 4 ; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 @@ -3492,8 +3492,8 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR]], 15 +; UNROLL-NO-IC-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15 ; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true ; UNROLL-NO-IC-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true ; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 @@ -3505,7 +3505,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-IC-NEXT: store i32 0, ptr [[A_GEP]], align 4 ; UNROLL-NO-IC-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP35:![0-9]+]] ; UNROLL-NO-IC: for.end: -; UNROLL-NO-IC-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[FOR_LCSSA]] ; ; UNROLL-NO-VF-LABEL: @sink_after_dead_inst( @@ -3535,13 +3535,13 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-VF: loop: ; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] -; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR]], 15 +; UNROLL-NO-VF-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] +; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15 ; UNROLL-NO-VF-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true ; UNROLL-NO-VF-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true ; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 @@ -3553,7 +3553,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-VF-NEXT: store i32 0, ptr [[A_GEP]], align 4 ; UNROLL-NO-VF-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP35:![0-9]+]] ; UNROLL-NO-VF: for.end: -; UNROLL-NO-VF-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: ret i32 [[FOR_LCSSA]] ; ; SINK-AFTER-LABEL: @sink_after_dead_inst( @@ -3587,8 +3587,8 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; SINK-AFTER-NEXT: br label [[LOOP:%.*]] ; SINK-AFTER: loop: ; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] -; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR]], 15 +; SINK-AFTER-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] +; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15 ; SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true ; SINK-AFTER-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true ; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 @@ -3600,7 +3600,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; SINK-AFTER-NEXT: store i32 0, ptr [[A_GEP]], align 4 ; SINK-AFTER-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP35:![0-9]+]] ; SINK-AFTER: for.end: -; SINK-AFTER-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: ret i32 [[FOR_LCSSA]] ; entry: @@ -3651,13 +3651,13 @@ define void @unused_recurrence(ptr %a) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 997, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 997, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_COND:%.*]] ; UNROLL-NO-IC: for.cond: ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] -; UNROLL-NO-IC-NEXT: [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10 +; UNROLL-NO-IC-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] +; UNROLL-NO-IC-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10 ; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; UNROLL-NO-IC-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 ; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000 @@ -3687,13 +3687,13 @@ define void @unused_recurrence(ptr %a) { ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 1001, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 1001, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_COND:%.*]] ; UNROLL-NO-VF: for.cond: ; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] -; UNROLL-NO-VF-NEXT: [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10 +; UNROLL-NO-VF-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] +; UNROLL-NO-VF-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10 ; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; UNROLL-NO-VF-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 ; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000 @@ -3720,13 +3720,13 @@ define void @unused_recurrence(ptr %a) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 ; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 1001, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 1001, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_COND:%.*]] ; SINK-AFTER: for.cond: ; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] -; SINK-AFTER-NEXT: [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10 +; SINK-AFTER-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] +; SINK-AFTER-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10 ; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; SINK-AFTER-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 ; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000 @@ -3768,12 +3768,12 @@ define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) { ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ] ; UNROLL-NO-IC-NEXT: [[ADD]] = add i64 [[PHI]], 1 ; UNROLL-NO-IC-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4 ; UNROLL-NO-IC-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1 @@ -3795,12 +3795,12 @@ define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) { ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-VF: loop: ; UNROLL-NO-VF-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ] +; UNROLL-NO-VF-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ] ; UNROLL-NO-VF-NEXT: [[ADD]] = add i64 [[PHI]], 1 ; UNROLL-NO-VF-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4 ; UNROLL-NO-VF-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1 @@ -3824,12 +3824,12 @@ define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) { ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[LOOP:%.*]] ; SINK-AFTER: loop: ; SINK-AFTER-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ] +; SINK-AFTER-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ] ; SINK-AFTER-NEXT: [[ADD]] = add i64 [[PHI]], 1 ; SINK-AFTER-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4 ; SINK-AFTER-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll index b79525bc3e440d..bc7e3c6c6e2510 100644 --- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -6,12 +6,13 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: bb: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i1> poison, i1 [[C_1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT1]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i1> poison, i1 [[C_2:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT3]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -22,7 +23,6 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], ; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[BROADCAST_SPLAT4]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> , <2 x i32> [[VEC_IND]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll index 306ec125dc2023..119571d1c39cc1 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll @@ -301,10 +301,14 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], 15 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], -16 ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i64 [[N]], -1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> poison, i32 [[COND:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT7]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLAT8]], +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLAT8]], +; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLAT8]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLAT8]], +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE38:%.*]] ] @@ -320,10 +324,6 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLAT8]], -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLAT8]], -; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLAT8]], -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLAT8]], ; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP0]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll index 18d2323ed6f5bc..af83c13bdfdc4f 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll @@ -17,10 +17,10 @@ define void @trunc_minimal_bitwidth(ptr %bptr, ptr noalias %hptr, i32 %val, i64 ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = trunc [[BROADCAST_SPLAT]] to ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = trunc [[BROADCAST_SPLAT]] to ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[HPTR:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store [[TMP4]], ptr [[TMP5]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll index da0f7283d80d5b..b3a24b472993be 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll @@ -220,10 +220,10 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) { ; CHECK-VF4IC1: vector.ph: ; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 ; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-VF4IC1-NOT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0 +; CHECK-VF4IC1-NOT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0 +; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[SPLAT_OF_A]], ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] -; CHECK-VF4IC1: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[SPLAT_OF_A]], ; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll index f55e37c7772609..d9827a8c71ee6c 100644 --- a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll @@ -41,6 +41,7 @@ define i16 @test(ptr %arg, i64 %N) { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[L_2]], i64 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -49,7 +50,6 @@ define i16 @test(ptr %arg, i64 %N) { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[L_1]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP6]], align 2, !alias.scope [[META0:![0-9]+]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[L_2]], i64 0 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i32 1 ; CHECK-NEXT: store i16 [[TMP8]], ptr [[TMP7]], align 2, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll index 33fc1f70bf3c09..1694de018c53be 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll @@ -69,13 +69,13 @@ define i32 @test_icmp_and_op_zext(ptr %dst, i64 %a) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[AND]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 1, [[DOTCAST]] ; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i8> diff --git a/llvm/test/Transforms/LoopVectorize/trunc-loads-p16.ll b/llvm/test/Transforms/LoopVectorize/trunc-loads-p16.ll index 7de2d4ddfa485f..697eddfa076b8b 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-loads-p16.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-loads-p16.ll @@ -11,6 +11,7 @@ define void @pr77468(ptr noalias %src, ptr noalias %dst, i1 %x) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[BROADCAST_SPLAT]] to <4 x i16> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -19,7 +20,6 @@ define void @pr77468(ptr noalias %src, ptr noalias %dst, i1 %x) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[SRC]], i16 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[BROADCAST_SPLAT]] to <4 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[WIDE_LOAD]] to <4 x i16> ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i16> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[DST]], i16 [[TMP0]] diff --git a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll index edf719564e6ee8..8b24f9993de705 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll @@ -11,12 +11,12 @@ define void @test_pr47927_lshr_const_shift_ops(ptr %dst, i32 %f) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8> ; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]] @@ -71,12 +71,12 @@ define void @test_shl_const_shift_ops(ptr %dst, i32 %f) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8> ; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]] @@ -131,12 +131,12 @@ define void @test_ashr_const_shift_ops(ptr %dst, i32 %f) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8> ; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-geps.ll b/llvm/test/Transforms/LoopVectorize/vector-geps.ll index 7aff527a5a7990..c9c1cd1c1817b8 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-geps.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-geps.ll @@ -65,12 +65,12 @@ define void @uniform_vector_gep_stored(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804 -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 4 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x ptr> [[DOTSPLAT]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll index f53c7b2fc6bae6..c6c834b5382f75 100644 --- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll @@ -163,6 +163,10 @@ define void @versioned_sext_use_in_gep(i32 %scale, ptr %dst, i64 %scale.2) { ; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[SCALE]], 1 ; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SCALE_2]] +; CHECK-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SCALE_2]] +; CHECK-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SCALE_2]] +; CHECK-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SCALE_2]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -174,11 +178,10 @@ define void @versioned_sext_use_in_gep(i32 %scale, ptr %dst, i64 %scale.2) { ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP16]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SCALE_2]] ; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP11]], align 8 -; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP13]], align 8 -; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8 -; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8 +; CHECK-NEXT: store ptr [[TMP81]], ptr [[TMP13]], align 8 +; CHECK-NEXT: store ptr [[TMP82]], ptr [[TMP15]], align 8 +; CHECK-NEXT: store ptr [[TMP83]], ptr [[TMP17]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index d29f70818a0767..0f3cd9d4ca4d61 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -259,6 +259,7 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: Live-in ir<11> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: CLONE ir<%gep.A.uniform> = getelementptr inbounds ir<%A>, ir<0> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -268,7 +269,6 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<21> + vp<[[CAN_IV]]> * ir<1> ; CHECK-NEXT: EMIT vp<[[WIDE_CAN_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDE_CAN_IV]]>, vp<[[BTC]]> -; CHECK-NEXT: CLONE ir<%gep.A.uniform> = getelementptr inbounds ir<%A>, ir<0> ; CHECK-NEXT: CLONE ir<%lv> = load ir<%gep.A.uniform> ; CHECK-NEXT: WIDEN ir<%cmp> = icmp ult ir<%iv>, ir<%k> ; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not ir<%cmp> diff --git a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll index 31021d5244f325..978b18dec064de 100644 --- a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll @@ -7,14 +7,14 @@ define void @pr63340(ptr %A, ptr %B) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 1 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 1 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[B]], i8 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 ; CHECK-NEXT: store <4 x ptr> [[DOTSPLAT]], ptr [[TMP3]], align 8