Skip to content

Commit

Permalink
[SYCL][Fusion] Handle GEPs that were canonicalized to byte offsets (#…
Browse files Browse the repository at this point in the history
…12557)

Upstream now canonicalizes constant GEPs to represent byte offsets, i.e.
using `i8` as source element type. This PR adapts the internalization
pass to this change by also remapping GEPs with a constant offset, if
that offset is a multiple of the internalized accessor's element size.

Signed-off-by: Julian Oppermann <julian.oppermann@codeplay.com>
  • Loading branch information
jopperm authored Jan 31, 2024
1 parent b8f9c8b commit 470e378
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 14 deletions.
56 changes: 47 additions & 9 deletions sycl-fusion/passes/internalization/Internalization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,21 @@ static void updateInternalizationMD(Function *F, StringRef Kind,
}
}

///
/// If \p GEPI represents a constant offset in bytes, return it, otherwise
/// return an empty value.
static std::optional<unsigned> getConstantByteOffset(GetElementPtrInst *GEPI,
const DataLayout &DL) {
MapVector<Value *, APInt> VariableOffsets;
auto IW = DL.getIndexSizeInBits(GEPI->getPointerAddressSpace());
APInt ConstantOffset = APInt::getZero(IW);
if (GEPI->collectOffset(DL, IW, VariableOffsets, ConstantOffset) &&
VariableOffsets.empty()) {
return ConstantOffset.getZExtValue();
}
return {};
}

///
/// When performing internalization, GEP instructions must be remapped, as the
/// address space has changed from N to N / LocalSize.
Expand All @@ -200,6 +215,25 @@ static void remap(GetElementPtrInst *GEPI, const PromotionInfo &PromInfo) {
return;
}

// GEPs with constant offset may be marked for remapping even if their element
// size differs from the accessor's element size. However we know that the
// offset is a multiple of the latter. Rewrite the instruction to represent a
// number of _elements_ to make it compatible with other GEPs in the current
// chain.
auto &DL = GEPI->getModule()->getDataLayout();
auto SrcElemTySz = DL.getTypeAllocSize(GEPI->getSourceElementType());
if (SrcElemTySz != PromInfo.ElemSize) {
auto COff = getConstantByteOffset(GEPI, DL);
// This is special case #2 in `getGEPKind`.
assert(COff.has_value() && *COff % PromInfo.ElemSize == 0 &&
GEPI->getNumIndices() == 1);
auto *IntTypeWithSameWidthAsAccessorElementType =
Builder.getIntNTy(PromInfo.ElemSize * 8);
GEPI->setSourceElementType(IntTypeWithSameWidthAsAccessorElementType);
GEPI->setResultElementType(IntTypeWithSameWidthAsAccessorElementType);
GEPI->idx_begin()->set(Builder.getInt64(*COff / PromInfo.ElemSize));
}

// An individual `GEP(ptr, offset)` is rewritten as
// `GEP(ptr, offset % LocalSize)`.
//
Expand Down Expand Up @@ -302,15 +336,19 @@ static int getGEPKind(GetElementPtrInst *GEPI, const PromotionInfo &PromInfo) {
return Kind;
}

// Check whether `GEPI` adds a constant offset, e.g. a byte offset to address
// into a padded structure, smaller than the element size.
MapVector<Value *, APInt> VariableOffsets;
auto IW = DL.getIndexSizeInBits(GEPI->getPointerAddressSpace());
APInt ConstantOffset = APInt::getZero(IW);
if (GEPI->collectOffset(DL, IW, VariableOffsets, ConstantOffset) &&
VariableOffsets.empty() &&
ConstantOffset.getZExtValue() < PromInfo.ElemSize) {
return ADDRESSES_INTO_AGGREGATE;
// We can handle a mismatch between `GEPI`'s element size and the accessors
// element size if `GEPI` represents a constant offset.
if (auto COff = getConstantByteOffset(GEPI, DL)) {
if (*COff < PromInfo.ElemSize) {
// Special case #1: The offset is less than the element size, hence we're
// addressing into an aggregrate and no remapping is required.
return ADDRESSES_INTO_AGGREGATE;
}
if (*COff % PromInfo.ElemSize == 0 && GEPI->getNumIndices() == 1) {
// Special case #2: The offset is a multiple of the element size, meaning
// `GEPI` selects an element and is subject to remapping.
return NEEDS_REMAPPING;
}
}

// We don't know what `GEPI` addresses; bail out.
Expand Down
15 changes: 11 additions & 4 deletions sycl-fusion/test/internalization/promote-private-non-unit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,17 @@ define spir_kernel void @fused_0(ptr addrspace(1) nocapture align 16 %KernelOne_
; CHECK: [[TMP4:%.*]] = tail call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
; CHECK: [[MUL:%.*]] = mul nuw nsw i64 [[TMP4]], 3
; CHECK: [[ADD:%.*]] = add nuw nsw i64 [[MUL]], 1
; CHECK: [[TMP6:%.*]] = add i64 [[TMP2]], [[ADD]]
; CHECK: [[TMP6:%.*]] = add i64 [[TMP2]], [[MUL]]
; CHECK: [[TMP7:%.*]] = urem i64 [[TMP6]], 3
; CHECK: [[ARRAYIDX_1:%.*]] = getelementptr inbounds %struct.MyStruct, ptr [[TMP1]], i64 [[TMP7]]
; CHECK: [[ADDA:%.*]] = add i64 [[TMP7]], 1
; CHECK: [[TMP7A:%.*]] = urem i64 [[ADDA]], 3

; COM: This constant i8-GEP was rewritten to encode an _element_ offset, and subsequently remapped.
; CHECK: [[ARRAYIDX_1A:%.*]] = getelementptr inbounds i256, ptr [[TMP1]], i64 [[TMP7A]]

; COM: This i8-GEP _was_ not remapped because it addresses into a single MyStruct element
; CHECK: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_1]], i64 20
; CHECK: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_1A]], i64 20

; CHECK: store i32 {{.*}}, ptr [[ARRAYIDX_2]], align 4
; CHECK: [[TMP8:%.*]] = add i64 [[TMP3]], [[ADD]]
Expand All @@ -66,8 +71,10 @@ entry:
%add.j2 = add nuw nsw i64 %mul.j2, 1
%arrayidx.j2 = getelementptr inbounds i32, ptr addrspace(1) %add.ptr.i35.i, i64 %add.j2
%1 = load i32, ptr addrspace(1) %arrayidx.j2, align 4
%arrayidx.i54.i = getelementptr inbounds %struct.MyStruct, ptr addrspace(1) %add.ptr.j2, i64 %add.j2
%arrayidx.j3 = getelementptr inbounds i8, ptr addrspace(1) %arrayidx.i54.i, i64 20
%arrayidx.i54.i = getelementptr inbounds %struct.MyStruct, ptr addrspace(1) %add.ptr.j2, i64 %mul.j2
; Mimic %add.j2 by artificially representing it as a constant byte offset (sizeof(MyStruct)==32 byte)
%arrayidx.plus.one.element = getelementptr inbounds i8, ptr addrspace(1) %arrayidx.i54.i, i64 32
%arrayidx.j3 = getelementptr inbounds i8, ptr addrspace(1) %arrayidx.plus.one.element, i64 20
store i32 %1, ptr addrspace(1) %arrayidx.j3, align 4
%conv.j2 = trunc i32 %1 to i8
%arrayidx.i70.i = getelementptr inbounds i8, ptr addrspace(1) %add.ptr.i44.i, i64 %add.j2
Expand Down
1 change: 0 additions & 1 deletion sycl/test-e2e/KernelFusion/internalize_vfunc.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// RUN: %{build} -fsycl-embed-ir -O2 -o %t.out
// RUN: %{run} %t.out
// XFAIL: hip,cuda

// Test complete fusion with private internalization specified on the
// accessors for a device kernel with sycl::vec::load and sycl::vec::store.
Expand Down

0 comments on commit 470e378

Please sign in to comment.