diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index da3c780550a083..04d2ff594bf5fd 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -114,6 +114,7 @@ class VectorCombine { bool foldShuffleOfBinops(Instruction &I); bool foldShuffleOfCastops(Instruction &I); bool foldShuffleOfShuffles(Instruction &I); + bool foldShuffleToIdentity(Instruction &I); bool foldShuffleFromReductions(Instruction &I); bool foldTruncFromReductions(Instruction &I); bool foldSelectShuffle(Instruction &I, bool FromReduction = false); @@ -1667,6 +1668,151 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) { return true; } +// Starting from a shuffle, look up through operands tracking the shuffled index +// of each lane. If we can simplify away the shuffles to identities then +// do so. +bool VectorCombine::foldShuffleToIdentity(Instruction &I) { + auto *Ty = dyn_cast(I.getType()); + if (!Ty || !isa(I.getOperand(0)) || + !isa(I.getOperand(1))) + return false; + + using InstLane = std::pair; + + auto LookThroughShuffles = [](Value *V, int Lane) -> InstLane { + while (auto *SV = dyn_cast(V)) { + unsigned NumElts = + cast(SV->getOperand(0)->getType())->getNumElements(); + int M = SV->getMaskValue(Lane); + if (M < 0) + return {nullptr, PoisonMaskElem}; + else if (M < (int)NumElts) { + V = SV->getOperand(0); + Lane = M; + } else { + V = SV->getOperand(1); + Lane = M - NumElts; + } + } + return InstLane{V, Lane}; + }; + + auto GenerateInstLaneVectorFromOperand = + [&LookThroughShuffles](ArrayRef Item, int Op) { + SmallVector NItem; + for (InstLane V : Item) { + NItem.emplace_back( + !V.first + ? InstLane{nullptr, PoisonMaskElem} + : LookThroughShuffles( + cast(V.first)->getOperand(Op), V.second)); + } + return NItem; + }; + + SmallVector Start(Ty->getNumElements()); + for (unsigned M = 0, E = Ty->getNumElements(); M < E; ++M) + Start[M] = LookThroughShuffles(&I, M); + + SmallVector> Worklist; + Worklist.push_back(Start); + SmallPtrSet IdentityLeafs, SplatLeafs; + unsigned NumVisited = 0; + + while (!Worklist.empty()) { + SmallVector Item = Worklist.pop_back_val(); + if (++NumVisited > MaxInstrsToScan) + return false; + + // If we found an undef first lane then bail out to keep things simple. + if (!Item[0].first) + return false; + + // Look for an identity value. + if (Item[0].second == 0 && Item[0].first->getType() == Ty && + all_of(drop_begin(enumerate(Item)), [&](const auto &E) { + return !E.value().first || (E.value().first == Item[0].first && + E.value().second == (int)E.index()); + })) { + IdentityLeafs.insert(Item[0].first); + continue; + } + // Look for a splat value. + if (all_of(drop_begin(Item), [&](InstLane &IL) { + return !IL.first || + (IL.first == Item[0].first && IL.second == Item[0].second); + })) { + SplatLeafs.insert(Item[0].first); + continue; + } + + // We need each element to be the same type of value, and check that each + // element has a single use. + if (!all_of(drop_begin(Item), [&](InstLane IL) { + if (!IL.first) + return true; + if (auto *I = dyn_cast(IL.first); I && !I->hasOneUse()) + return false; + if (IL.first->getValueID() != Item[0].first->getValueID()) + return false; + auto *II = dyn_cast(IL.first); + return !II || + II->getIntrinsicID() == + cast(Item[0].first)->getIntrinsicID(); + })) + return false; + + // Check the operator is one that we support. We exclude div/rem in case + // they hit UB from poison lanes. + if (isa(Item[0].first) && + !cast(Item[0].first)->isIntDivRem()) { + Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0)); + Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1)); + } else if (isa(Item[0].first)) { + Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0)); + } else { + return false; + } + } + + // If we got this far, we know the shuffles are superfluous and can be + // removed. Scan through again and generate the new tree of instructions. + std::function)> Generate = + [&](ArrayRef Item) -> Value * { + if (IdentityLeafs.contains(Item[0].first) && + all_of(drop_begin(enumerate(Item)), [&](const auto &E) { + return !E.value().first || (E.value().first == Item[0].first && + E.value().second == (int)E.index()); + })) { + return Item[0].first; + } + if (SplatLeafs.contains(Item[0].first)) { + if (auto ILI = dyn_cast(Item[0].first)) + Builder.SetInsertPoint(*ILI->getInsertionPointAfterDef()); + else if (isa(Item[0].first)) + Builder.SetInsertPointPastAllocas(I.getParent()->getParent()); + SmallVector Mask(Ty->getNumElements(), Item[0].second); + return Builder.CreateShuffleVector(Item[0].first, Mask); + } + + auto *I = cast(Item[0].first); + SmallVector Ops(I->getNumOperands()); + for (unsigned Idx = 0, E = I->getNumOperands(); Idx < E; Idx++) + Ops[Idx] = Generate(GenerateInstLaneVectorFromOperand(Item, Idx)); + Builder.SetInsertPoint(I); + if (auto BI = dyn_cast(I)) + return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(), + Ops[0], Ops[1]); + assert(isa(I) && + "Unexpected instruction type in Generate"); + return Builder.CreateUnOp((Instruction::UnaryOps)I->getOpcode(), Ops[0]); + }; + + Value *V = Generate(Start); + replaceValue(I, *V); + return true; +} + /// Given a commutative reduction, the order of the input lanes does not alter /// the results. We can use this to remove certain shuffles feeding the /// reduction, removing the need to shuffle at all. @@ -2224,6 +2370,7 @@ bool VectorCombine::run() { MadeChange |= foldShuffleOfCastops(I); MadeChange |= foldShuffleOfShuffles(I); MadeChange |= foldSelectShuffle(I); + MadeChange |= foldShuffleToIdentity(I); break; case Instruction::BitCast: MadeChange |= foldBitcastShuffle(I); diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll index f1d7c0e0c41233..c085e10c049a93 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll @@ -22,19 +22,13 @@ define void @add4(ptr noalias noundef %x, ptr noalias noundef %y, i32 noundef %n ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i16>, ptr [[TMP0]], align 2 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_VEC24:%.*]] = load <32 x i16>, ptr [[TMP1]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]] -; CHECK-NEXT: [[TMP3:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]] -; CHECK-NEXT: [[TMP4:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]] -; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP6:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> [[TMP3]], <16 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> [[TMP6]], <16 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> [[TMP8]], <32 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]] +; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP2]] ; CHECK-NEXT: store <32 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP9]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -412,22 +406,13 @@ define void @addmul(ptr noalias noundef %x, ptr noundef %y, ptr noundef %z, i32 ; CHECK-NEXT: [[TMP2:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_VEC36:%.*]] = load <32 x i16>, ptr [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = add <32 x i16> [[TMP2]], [[WIDE_VEC36]] -; CHECK-NEXT: [[TMP5:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]] -; CHECK-NEXT: [[TMP6:%.*]] = add <32 x i16> [[TMP5]], [[WIDE_VEC36]] -; CHECK-NEXT: [[TMP7:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]] -; CHECK-NEXT: [[TMP8:%.*]] = add <32 x i16> [[TMP7]], [[WIDE_VEC36]] -; CHECK-NEXT: [[TMP9:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP10:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]] -; CHECK-NEXT: [[TMP11:%.*]] = add <32 x i16> [[TMP10]], [[WIDE_VEC36]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> [[TMP6]], <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <32 x i16> [[TMP8]], <32 x i16> [[TMP11]], <16 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i16> [[TMP12]], <16 x i16> [[TMP13]], <32 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[TMP2]], [[WIDE_VEC36]] +; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP4]] ; CHECK-NEXT: store <32 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP14]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll index 6a81964b917edf..b96732e24ce4e7 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -15,13 +15,7 @@ define <8 x i8> @trivial(<8 x i8> %a) { define <8 x i8> @add(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: @add( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]] -; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: ret <8 x i8> [[R]] ; %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> @@ -80,13 +74,7 @@ define <8 x i8> @wrong_lanes(<8 x i8> %a, <8 x i8> %b) { define <8 x half> @fadd(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: @fadd( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x half> [[B]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BT]] -; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BB]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: ret <8 x half> [[R]] ; %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> @@ -101,11 +89,7 @@ define <8 x half> @fadd(<8 x half> %a, <8 x half> %b) { define <8 x half> @fneg(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: @fneg( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[ABT:%.*]] = fneg <4 x half> [[AT]] -; CHECK-NEXT: [[ABB:%.*]] = fneg <4 x half> [[AB]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fneg <8 x half> [[A:%.*]] ; CHECK-NEXT: ret <8 x half> [[R]] ; %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> @@ -135,12 +119,8 @@ define <8 x i8> @abs(<8 x i8> %a) { define <8 x half> @splat0(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: @splat0( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]] -; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A:%.*]], [[TMP1]] ; CHECK-NEXT: ret <8 x half> [[R]] ; %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> @@ -154,12 +134,8 @@ define <8 x half> @splat0(<8 x half> %a, <8 x half> %b) { define <8 x half> @splat2(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: @splat2( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]] -; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A:%.*]], [[TMP1]] ; CHECK-NEXT: ret <8 x half> [[R]] ; %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> @@ -173,12 +149,8 @@ define <8 x half> @splat2(<8 x half> %a, <8 x half> %b) { define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: @splatandidentity( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]] -; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A]], [[TMP1]] ; CHECK-NEXT: ret <8 x half> [[R]] ; %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> @@ -192,11 +164,9 @@ define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) { define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: @splattwice( -; CHECK-NEXT: [[AS:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[AB1:%.*]] = fadd <4 x half> [[AS]], [[BS]] -; CHECK-NEXT: [[AB2:%.*]] = fadd <4 x half> [[AS]], [[BS]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[AB1]], <4 x half> [[AB2]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret <8 x half> [[R]] ; %as = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> zeroinitializer @@ -209,13 +179,7 @@ define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) { define <8 x i8> @undeflane(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: @undeflane( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]] -; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: ret <8 x i8> [[R]] ; %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> @@ -281,18 +245,9 @@ define <8 x i8> @constantdiff2(<8 x i8> %a) { define <8 x i8> @inner_shuffle(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: @inner_shuffle( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[CS:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[ABT:%.*]] = mul <4 x i8> [[AT]], [[BT]] -; CHECK-NEXT: [[ABB:%.*]] = mul <4 x i8> [[AB]], [[BB]] -; CHECK-NEXT: [[ABT2:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[ABB2:%.*]] = shufflevector <4 x i8> [[ABB]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[ABT3:%.*]] = add <4 x i8> [[ABT2]], [[CS]] -; CHECK-NEXT: [[ABB3:%.*]] = add <4 x i8> [[ABB2]], [[CS]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT3]], <4 x i8> [[ABB3]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i8> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret <8 x i8> [[R]] ; %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> @@ -335,14 +290,9 @@ define <8 x i8> @extrause_add(<8 x i8> %a, <8 x i8> %b) { define <8 x i8> @extrause_shuffle(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: @extrause_shuffle( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> ; CHECK-NEXT: call void @use(<4 x i8> [[BT]]) -; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]] -; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[A:%.*]], [[B]] ; CHECK-NEXT: ret <8 x i8> [[R]] ; %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> @@ -457,7 +407,7 @@ define <8 x i8> @intrinsics_minmax(<8 x i8> %a, <8 x i8> %b) { ; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> ; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[AT]], <4 x i8> [[BT]]) ; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[AB]], <4 x i8> [[BB]]) -; CHECK-NEXT: [[ABT1:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[ABT]], <4 x i8> [[BT]]) +; CHECK-NEXT: [[ABT1:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[ABT]], <4 x i8> [[BT]]) ; CHECK-NEXT: [[ABB1:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[ABB]], <4 x i8> [[BB]]) ; CHECK-NEXT: [[ABT2:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[ABT1]], <4 x i8> [[BT]]) ; CHECK-NEXT: [[ABB2:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[ABB1]], <4 x i8> [[BB]]) @@ -472,7 +422,7 @@ define <8 x i8> @intrinsics_minmax(<8 x i8> %a, <8 x i8> %b) { %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> %abt = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %at, <4 x i8> %bt) %abb = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %ab, <4 x i8> %bb) - %abt1 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %abt, <4 x i8> %bt) + %abt1 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %abt, <4 x i8> %bt) %abb1 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %abb, <4 x i8> %bb) %abt2 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %abt1, <4 x i8> %bt) %abb2 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %abb1, <4 x i8> %bb) @@ -536,57 +486,42 @@ define <8 x i8> @intrinsics_different(<8 x i8> %a, <8 x i8> %b) { ret <8 x i8> %r } +; div and rem are currently excluded. +define <8 x i8> @div(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: @div( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[ABT:%.*]] = udiv <4 x i8> [[AT]], [[BT]] +; CHECK-NEXT: [[ABB:%.*]] = udiv <4 x i8> [[AB]], [[BB]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> + %bb = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> + %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> + %abt = udiv <4 x i8> %at, %bt + %abb = udiv <4 x i8> %ab, %bb + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> + ret <8 x i8> %r +} + define void @v8f64interleave(i64 %0, ptr %1, ptr %x, double %z) { ; CHECK-LABEL: @v8f64interleave( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Z:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x double>, ptr [[TMP1:%.*]], align 8 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC27:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC28:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC29:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC30:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC31:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC32:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC33:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]] -; CHECK-NEXT: [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[STRIDED_VEC35:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC36:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC37:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC38:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC39:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC40:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC41:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC42:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[STRIDED_VEC35]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[STRIDED_VEC27]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[STRIDED_VEC36]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x double> [[STRIDED_VEC28]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <2 x double> [[STRIDED_VEC37]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <2 x double> [[STRIDED_VEC29]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[STRIDED_VEC38]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x double> [[STRIDED_VEC30]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[STRIDED_VEC39]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <2 x double> [[STRIDED_VEC31]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <2 x double> [[STRIDED_VEC40]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[STRIDED_VEC32]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <2 x double> [[STRIDED_VEC41]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = or disjoint i64 [[TMP0]], 7 -; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <2 x double> [[STRIDED_VEC33]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = fadd fast <2 x double> [[STRIDED_VEC42]], [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 -56 -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP10]], <4 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> [[TMP14]], <4 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <2 x double> [[TMP16]], <2 x double> [[TMP20]], <4 x i32> -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x double> [[TMP22]], <4 x double> [[TMP23]], <8 x i32> -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x double> [[TMP24]], <4 x double> [[TMP25]], <8 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> [[TMP27]], <16 x i32> -; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP21]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <16 x double> [[WIDE_VEC]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]] +; CHECK-NEXT: [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd <16 x double> [[WIDE_VEC34]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[TMP0]], 7 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -56 +; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll index 685d661ea6bcd2..60a6c4b1d9b93f 100644 --- a/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll @@ -12,12 +12,11 @@ define <4 x double> @PR60649() { ; CHECK: unreachable: ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[T0:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[UNREACHABLE:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[UNREACHABLE:%.*]] ] ; CHECK-NEXT: [[T1:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY]] ], [ zeroinitializer, [[UNREACHABLE]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x double> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP0]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> [[TMP0]], +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[T5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[T5]] ;