diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 318a9d773dc2e1..de220d68a59163 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -9136,11 +9136,12 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp( } // If this loop must exit based on this condition (or execute undefined - // behaviour), and we can prove the test sequence produced must repeat - // the same values on self-wrap of the IV, then we can infer that IV - // doesn't self wrap because if it did, we'd have an infinite (undefined) - // loop. Note that a stride of 0 is trivially no-self-wrap by definition. + // behaviour), see if we can improve wrap flags. This is essentially + // a must execute style proof. if (ControllingFiniteLoop && isLoopInvariant(RHS, L)) { + // If we can prove the test sequence produced must repeat the same values + // on self-wrap of the IV, then we can infer that IV doesn't self wrap + // because if it did, we'd have an infinite (undefined) loop. // TODO: We can peel off any functions which are invertible *in L*. Loop // invariant terms are effectively constants for our purposes here. auto *InnerLHS = LHS; @@ -9156,6 +9157,25 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp( Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); setNoWrapFlags(const_cast(AR), Flags); } + + // For a slt/ult condition with a positive step, can we prove nsw/nuw? + // From no-self-wrap, this follows trivially from the fact that every + // (un)signed-wrapped, but not self-wrapped value must be LT than the + // last value before (un)signed wrap. Since we know that last value + // didn't exit, nor will any smaller one. + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT) { + auto WrapType = Pred == ICmpInst::ICMP_SLT ? SCEV::FlagNSW : SCEV::FlagNUW; + if (const SCEVAddRecExpr *AR = dyn_cast(LHS); + AR && AR->getLoop() == L && AR->isAffine() && + !AR->getNoWrapFlags(WrapType) && AR->hasNoSelfWrap() && + isKnownPositive(AR->getStepRecurrence(*this))) { + auto Flags = AR->getNoWrapFlags(); + Flags = setFlags(Flags, WrapType); + SmallVector Operands{AR->operands()}; + Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); + setNoWrapFlags(const_cast(AR), Flags); + } + } } switch (Pred) { @@ -12769,35 +12789,6 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, const SCEVAddRecExpr *IV = dyn_cast(LHS); bool PredicatedIV = false; - - auto canAssumeNoSelfWrap = [&](const SCEVAddRecExpr *AR) { - // Can we prove this loop *must* be UB if overflow of IV occurs? - // Reasoning goes as follows: - // * Suppose the IV did self wrap. - // * If Stride evenly divides the iteration space, then once wrap - // occurs, the loop must revisit the same values. - // * We know that RHS is invariant, and that none of those values - // caused this exit to be taken previously. Thus, this exit is - // dynamically dead. - // * If this is the sole exit, then a dead exit implies the loop - // must be infinite if there are no abnormal exits. - // * If the loop were infinite, then it must either not be mustprogress - // or have side effects. Otherwise, it must be UB. - // * It can't (by assumption), be UB so we have contradicted our - // premise and can conclude the IV did not in fact self-wrap. - if (!isLoopInvariant(RHS, L)) - return false; - - if (!isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true, - /*OrNegative*/ true)) - return false; - - if (!ControlsOnlyExit || !loopHasNoAbnormalExits(L)) - return false; - - return loopIsFiniteByAssumption(L); - }; - if (!IV) { if (auto *ZExt = dyn_cast(LHS)) { const SCEVAddRecExpr *AR = dyn_cast(ZExt->getOperand()); @@ -12948,21 +12939,10 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, Stride = getUMaxExpr(Stride, getOne(Stride->getType())); } } - } else if (!Stride->isOne() && !NoWrap) { - auto isUBOnWrap = [&]() { - // From no-self-wrap, we need to then prove no-(un)signed-wrap. This - // follows trivially from the fact that every (un)signed-wrapped, but - // not self-wrapped value must be LT than the last value before - // (un)signed wrap. Since we know that last value didn't exit, nor - // will any smaller one. - return canAssumeNoSelfWrap(IV); - }; - + } else if (!NoWrap) { // Avoid proven overflow cases: this will ensure that the backedge taken - // count will not generate any unsigned overflow. Relaxed no-overflow - // conditions exploit NoWrapFlags, allowing to optimize in presence of - // undefined behaviors like the case of C language. - if (canIVOverflowOnLT(RHS, Stride, IsSigned) && !isUBOnWrap()) + // count will not generate any unsigned overflow. + if (canIVOverflowOnLT(RHS, Stride, IsSigned)) return getCouldNotCompute(); } diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll index 64306ac28cf275..b313842ad5e1a9 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll @@ -239,12 +239,6 @@ define void @neg_rhs_wrong_range(i16 %n.raw) mustprogress { ; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. -; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((-1 + (2 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16)))) /u 2) -; CHECK-NEXT: Predicates: -; CHECK-NEXT: {2,+,2}<%for.body> Added Flags: -; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is ((-1 + (2 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16)))) /u 2) -; CHECK-NEXT: Predicates: -; CHECK-NEXT: {2,+,2}<%for.body> Added Flags: ; entry: %n.and = and i16 %n.raw, 255 diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll index 6f26a8a64e7188..30a095fd144fa4 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll @@ -377,7 +377,7 @@ define void @vscale_slt_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_ra ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 ; CHECK-NEXT: --> {%A,+,(4 * vscale)}<%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %add = add i32 %i.05, %vscale -; CHECK-NEXT: --> {vscale,+,vscale}<%for.body> U: full-set S: full-set Exits: (vscale * (1 + ((-1 + %n) /u vscale))) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {vscale,+,vscale}<%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + ((-1 + %n) /u vscale))) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_noflags ; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u vscale) ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 1073741822 @@ -415,7 +415,7 @@ define void @vscalex4_ult_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_ ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 ; CHECK-NEXT: --> {%A,+,(16 * vscale)}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale))) + %A) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %add = add i32 %i.05, %VF -; CHECK-NEXT: --> {(4 * vscale),+,(4 * vscale)}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale))))) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {(4 * vscale),+,(4 * vscale)}<%for.body> U: [8,-3) S: [-2147483648,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale))))) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @vscalex4_ult_noflags ; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)) ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 536870910