Skip to content

Commit

Permalink
[SYCL][Fusion] Handle fusion leading to synchronization issues (#12538)
Browse files Browse the repository at this point in the history
Do not allow fusion when one of the kernels has an explicit local size
and it requires ID remapping, i.e., it has a different number of
dimensions w.r.t. the fused ND-range or different global size in
dimensions [2, N). In this case, two work-items belonging to the same
work-group may not belong to the same work-group in the fused ND-range.

Signed-off-by: Victor Perez <victor.perez@codeplay.com>

---------

Signed-off-by: Victor Perez <victor.perez@codeplay.com>
  • Loading branch information
victor-eds committed Feb 1, 2024
1 parent 435845b commit af448b0
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 7 deletions.
12 changes: 12 additions & 0 deletions sycl-fusion/common/lib/NDRangesHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,18 @@ jit_compiler::FusedNDRange::get(ArrayRef<NDRange> NDRanges) {
"Cannot fuse kernels whose fusion would "
"yield non-uniform work-group sizes");
}

// Work-items in the same work-group in the original ND-ranges must be in
// the same work-group in the fused one.
if (LocalSize && any_of(NDRanges, [&Fused](const NDRange &NDR) {
return NDR.hasSpecificLocalSize() && requireIDRemapping(Fused, NDR);
})) {
return createStringError(
inconvertibleErrorCode(),
"Cannot fuse kernels when any of the fused kernels with a specific "
"local size has different global sizes in dimensions [2, N) or "
"different number of dimensions");
}
}

return FusedNDRange{Fused, IsHeterogeneousList, NDRanges};
Expand Down
3 changes: 2 additions & 1 deletion sycl/doc/design/KernelFusionJIT.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,8 @@ These restrictions can be simplified to:

- No two local sizes specified by the nd-ranges will be different;
- No global id remapping is needed ([see](#work-item-remapping)) or all input offsets are 0;
- All the fused nd-ranges must have the same offset.
- All the fused nd-ranges must have the same offset;
- No global id remapping is needed for kernels specifying a local size.

As we can see, there is no restrictions in the number of dimensions or global sizes of the input nd-ranges.

Expand Down
33 changes: 27 additions & 6 deletions sycl/test-e2e/KernelFusion/abort_fusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,19 @@ enum class Internalization { None, Local, Private };

template <typename Range> size_t getSize(Range r);

template <> size_t getSize(range<1> r) { return r.size(); }
template <> size_t getSize(nd_range<1> r) {
template <int Dimensions> size_t getSize(range<Dimensions> r) {
return r.size();
}
template <int Dimensions> size_t getSize(nd_range<Dimensions> r) {
return r.get_global_range().size();
}

template <typename Kernel1Name, typename Kernel2Name, typename Range1,
typename Range2>
void performFusion(queue &q, Range1 R1, Range2 R2) {
using IndexTy1 = item<Range1::dimensions>;
using IndexTy2 = item<Range2::dimensions>;

int in[dataSize], tmp[dataSize], out[dataSize];

for (size_t i = 0; i < dataSize; ++i) {
Expand All @@ -43,15 +48,19 @@ void performFusion(queue &q, Range1 R1, Range2 R2) {
q.submit([&](handler &cgh) {
auto accIn = bIn.get_access(cgh);
auto accTmp = bTmp.get_access(cgh);
cgh.parallel_for<Kernel1Name>(
R1, [=](item<1> i) { accTmp[i] = accIn[i] + 5; });
cgh.parallel_for<Kernel1Name>(R1, [=](IndexTy1 i) {
size_t j = i.get_linear_id();
accTmp[j] = accIn[j] + 5;
});
});

q.submit([&](handler &cgh) {
auto accTmp = bTmp.get_access(cgh);
auto accOut = bOut.get_access(cgh);
cgh.parallel_for<Kernel2Name>(
R2, [=](id<1> i) { accOut[i] = accTmp[i] * 2; });
cgh.parallel_for<Kernel2Name>(R2, [=](IndexTy2 i) {
size_t j = i.get_linear_id();
accOut[j] = accTmp[j] * 2;
});
});

fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}});
Expand Down Expand Up @@ -117,5 +126,17 @@ int main() {
// CHECK-NEXT: Cannot fuse kernels whose fusion would yield non-uniform work-group sizes
// CHECK: COMPUTATION OK

// Scenario: Fusing two kernels that may lead to synchronization issues as two
// work-items in the same work-group may not be in the same work-group in the
// fused ND-range.
performFusion<class Kernel1_5, class Kernel2_5>(
q, nd_range<2>{range<2>{2, 2}, range<2>{2, 2}},
nd_range<2>{range<2>{4, 4}, range<2>{2, 2}});
// CHECK: ERROR: JIT compilation for kernel fusion failed with message:
// CHECK-NEXT: Illegal ND-range combination
// CHECK-NEXT: Detailed information:
// CHECK-NEXT: Cannot fuse kernels when any of the fused kernels with a specific local size has different global sizes in dimensions [2, N) or different number of dimensions
// CHECK: COMPUTATION OK

return 0;
}

0 comments on commit af448b0

Please sign in to comment.