Skip to content

Commit

Permalink
[LAA] Remove loop-invariant check added in 234cc40.
Browse files Browse the repository at this point in the history
234cc40 introduced a loop-invariance check to limit the
compile-time impact of the newly added checks.

This patch removes the restriction and avoids extra compile-time impact
by sinking the check to exits where we would return an unknown
dependence. This notably reduces the amount the extra checks are
executed while not missing out on any improvements from them.

https://llvm-compile-time-tracker.com/compare.php?from=33e7cd6ff23f6c904314d17c68dc58168fd32d09&to=7c55e66d4f31ce8262b90c119a8e84e1f9515ff1&stat=instructions:u
  • Loading branch information
fhahn committed Aug 26, 2024
1 parent b0fbfbb commit a800533
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 40 deletions.
81 changes: 58 additions & 23 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1937,27 +1937,6 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
<< ": " << *Dist << "\n");

// Check if we can prove that Sink only accesses memory after Src's end or
// vice versa. At the moment this is limited to cases where either source or
// sink are loop invariant to avoid compile-time increases. This is not
// required for correctness.
if (SE.isLoopInvariant(Src, InnermostLoop) ||
SE.isLoopInvariant(Sink, InnermostLoop)) {
const auto &[SrcStart, SrcEnd] =
getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds);
const auto &[SinkStart, SinkEnd] =
getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds);
if (!isa<SCEVCouldNotCompute>(SrcStart) &&
!isa<SCEVCouldNotCompute>(SrcEnd) &&
!isa<SCEVCouldNotCompute>(SinkStart) &&
!isa<SCEVCouldNotCompute>(SinkEnd)) {
if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart))
return MemoryDepChecker::Dependence::NoDep;
if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart))
return MemoryDepChecker::Dependence::NoDep;
}
}

// Need accesses with constant strides and the same direction for further
// dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
// similar code or pointer arithmetic that could wrap in the address space.
Expand Down Expand Up @@ -2003,12 +1982,45 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
const MemAccessInfo &B, unsigned BIdx) {
assert(AIdx < BIdx && "Must pass arguments in program order");

// Check if we can prove that Sink only accesses memory after Src's end or
// vice versa. The helper is used to perform the checks only on the exit paths
// where it helps to improve the analysis result.
auto CheckCompletelyBeforeOrAfter = [&]() {
auto *APtr = A.getPointer();
auto *BPtr = B.getPointer();

Type *ATy = getLoadStoreType(InstMap[AIdx]);
Type *BTy = getLoadStoreType(InstMap[BIdx]);

const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);

const auto &[SrcStart, SrcEnd] =
getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds);
if (isa<SCEVCouldNotCompute>(SrcStart) || isa<SCEVCouldNotCompute>(SrcEnd))
return false;

const auto &[SinkStart, SinkEnd] =
getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds);
if (isa<SCEVCouldNotCompute>(SinkStart) ||
isa<SCEVCouldNotCompute>(SinkEnd))
return false;

auto &SE = *PSE.getSE();
return SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart) ||
SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart);
};

// Get the dependence distance, stride, type size and what access writes for
// the dependence between A and B.
auto Res =
getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
if (std::holds_alternative<Dependence::DepType>(Res))
if (std::holds_alternative<Dependence::DepType>(Res)) {
if (std::get<Dependence::DepType>(Res) == Dependence::Unknown &&
CheckCompletelyBeforeOrAfter())
return Dependence::NoDep;
return std::get<Dependence::DepType>(Res);
}

auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] =
std::get<DepDistanceStrideAndSizeInfo>(Res);
Expand All @@ -2017,6 +2029,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
std::optional<uint64_t> CommonStride =
StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt;
if (isa<SCEVCouldNotCompute>(Dist)) {
if (CheckCompletelyBeforeOrAfter())
return Dependence::NoDep;

// TODO: Relax requirement that there is a common stride to retry with
// non-constant distance dependencies.
FoundNonConstantDistanceDependence |= CommonStride.has_value();
Expand Down Expand Up @@ -2068,6 +2083,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// Write to the same location with the same size.
return Dependence::Forward;
}
assert(!CheckCompletelyBeforeOrAfter() &&
"unexpectedly proved no dependence");
LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
"different type sizes\n");
return Dependence::Unknown;
Expand All @@ -2089,6 +2106,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// did not set it when strides were different but there is no inherent
// reason to.
FoundNonConstantDistanceDependence |= CommonStride.has_value();
if (CheckCompletelyBeforeOrAfter())
return Dependence::NoDep;
return Dependence::Unknown;
}
if (!HasSameSize ||
Expand All @@ -2108,6 +2127,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// Below we only handle strictly positive distances.
if (MinDistance <= 0) {
FoundNonConstantDistanceDependence |= CommonStride.has_value();
if (CheckCompletelyBeforeOrAfter())
return Dependence::NoDep;

return Dependence::Unknown;
}

Expand All @@ -2124,13 +2146,18 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
}

if (!HasSameSize) {
if (CheckCompletelyBeforeOrAfter())
return Dependence::NoDep;
LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
"different type sizes\n");
return Dependence::Unknown;
}

if (!CommonStride)
if (!CommonStride) {
if (CheckCompletelyBeforeOrAfter())
return Dependence::NoDep;
return Dependence::Unknown;
}

// Bail out early if passed-in parameters make vectorization not feasible.
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
Expand Down Expand Up @@ -2178,6 +2205,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// dependence distance and the distance may be larger at runtime (and safe
// for vectorization). Classify it as Unknown, so we re-try with runtime
// checks.
//
if (CheckCompletelyBeforeOrAfter())
return Dependence::NoDep;

return Dependence::Unknown;
}
LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "
Expand All @@ -2190,6 +2221,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
if (MinDistanceNeeded > MinDepDistBytes) {
LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
<< MinDistanceNeeded << " size in bytes\n");
assert(!CheckCompletelyBeforeOrAfter() &&
"unexpectedly proved no dependence");
return Dependence::Backward;
}

Expand Down Expand Up @@ -2237,6 +2270,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// For non-constant distances, we checked the lower bound of the dependence
// distance and the distance may be larger at runtime (and safe for
// vectorization). Classify it as Unknown, so we re-try with runtime checks.
assert(!CheckCompletelyBeforeOrAfter() &&
"unexpectedly proved no dependence");
return Dependence::Unknown;
}

Expand Down
10 changes: 1 addition & 9 deletions llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -130,16 +130,8 @@ define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
; CHECK-LABEL: 'neg_dist_dep_type_size_equivalence'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8
; CHECK-EMPTY:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 ->
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8
; CHECK-EMPTY:
; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
; CHECK-NEXT: store double %val, ptr %gep.iv.101.i64, align 8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,8 @@ exit:
define void @different_non_constant_strides_known_backward_distance_larger_than_trip_count(ptr %A) {
; CHECK-LABEL: 'different_non_constant_strides_known_backward_distance_larger_than_trip_count'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: %l = load i32, ptr %gep, align 4 ->
; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/global_alias.ll
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ for.end: ; preds = %for.body
; return Foo.A[a];
; }
; CHECK-LABEL: define i32 @mayAlias01(
; CHECK-NOT: add nsw <4 x i32>
; CHECK: add nsw <4 x i32>
; CHECK: ret

define i32 @mayAlias01(i32 %a) nounwind {
Expand Down Expand Up @@ -536,7 +536,7 @@ for.end: ; preds = %for.body
; return Foo.A[a];
; }
; CHECK-LABEL: define i32 @mayAlias02(
; CHECK-NOT: add nsw <4 x i32>
; CHECK: add nsw <4 x i32>
; CHECK: ret

define i32 @mayAlias02(i32 %a) nounwind {
Expand Down

0 comments on commit a800533

Please sign in to comment.