Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][ESIMD] Report an error when slm_init is called more than once in the kernel #12804

Merged
merged 17 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1750,6 +1750,83 @@ void lowerGlobalsToVector(Module &M) {

} // namespace

static void checkSLMInit(Module &M) {
SmallPtrSet<const Function *, 8u> SLMInitKernels;
SmallPtrSet<const Function *, 8u> LocalAccessorKernels;

for (auto &F : M) {
if (!isSlmInit(F)) {
if (!llvm::esimd::isESIMDKernel(F))
continue;
unsigned Idx = 0;
for (const Argument &Arg : F.args()) {
if (Arg.getType()->isPointerTy()) {
auto *KernelArgAccPtrs = F.getMetadata("kernel_arg_accessor_ptr");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we send an email to the CFE/SYCL language team as a non-blocking follow-up to see if they have any ideas on improving this check by doing it somewhere else (Sema?). Relying on the metadata should work but it seems a bit indirect and possibly flaky, but I don't know how to do any better today.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do, although it looks like a standard way to mark passing accessors to the kernel from FE


if (KernelArgAccPtrs) {
auto *AccMD =
cast<ConstantAsMetadata>(KernelArgAccPtrs->getOperand(Idx));
auto AccMDVal = cast<ConstantInt>(AccMD->getValue())->getValue();
bool IsAcc = static_cast<unsigned>(AccMDVal.getZExtValue());

constexpr unsigned LocalAS{3};
if (IsAcc && cast<PointerType>(Arg.getType())->getAddressSpace() ==
LocalAS) {
LocalAccessorKernels.insert(&F);
break;
}
}
}
Idx++;
}
} else {
for (User *U : F.users()) {
auto *FCall = dyn_cast<CallInst>(U);
if (FCall && FCall->getCalledFunction() == &F) {
Function *GenF = FCall->getFunction();
SmallPtrSet<Function *, 32> Visited;
sycl::utils::traverseCallgraphUp(
GenF,
[&](Function *GraphNode) {
if (llvm::esimd::isESIMDKernel(*GraphNode)) {
if (SLMInitKernels.contains(GraphNode)) {
StringRef KernelName = GraphNode->getName();
std::string ErrorMsg =
std::string("slm_init is called more than once "
"from kernel '") +
demangle(KernelName.str()) + "'.";
GraphNode->getContext().emitError(ErrorMsg);
} else {
SLMInitKernels.insert(GraphNode);
}
}
},
Visited, false);
bool VisitedKernel = false;
for (const Function *Caller : Visited) {
if (llvm::esimd::isESIMDKernel(*Caller)) {
VisitedKernel = true;
break;
}
}
if (!VisitedKernel) {
F.getContext().emitError(
"slm_init must be called directly from ESIMD kernel.");
}
} else {
F.getContext().emitError(
"slm_init can only be used as a direct call.");
}
}
}
for (const Function *Kernel : LocalAccessorKernels) {
if (SLMInitKernels.contains(Kernel))
F.getContext().emitError(
"slm_init can not be used with local accessors.");
}
}
}

bool SYCLLowerESIMDPass::prepareForAlwaysInliner(Module &M) {

auto markAlwaysInlined = [](Function &F) -> bool {
Expand Down Expand Up @@ -1908,6 +1985,10 @@ static void fixFunctionReadWriteAttributes(Module &M) {

PreservedAnalyses SYCLLowerESIMDPass::run(Module &M,
ModuleAnalysisManager &MAM) {

// Check validity of slm_init calls.
checkSLMInit(M);

// AlwaysInlinerPass is required for correctness.
bool ForceInline = prepareForAlwaysInliner(M);
if (ForceInline) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,6 @@ bool testLocalAccSLM(queue Q, uint32_t Groups,
auto OutPtr = Out.data();

CGH.parallel_for(Range, [=](sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL {
constexpr uint32_t SLMSize = (GroupSize * N) * sizeof(T);
slm_init<SLMSize>();

uint16_t GlobalID = ndi.get_global_id(0);
uint16_t LocalID = ndi.get_local_id(0);
uint32_t LocalElemOffset = LocalID * N * sizeof(T);
Expand Down
22 changes: 22 additions & 0 deletions sycl/test/esimd/slm_init_check.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// RUN: not %clangxx -fsycl %s 2>&1 | FileCheck %s

// This test verifies more than 1 call to slm_init triggers an error.

#include <iostream>
#include <sycl/ext/intel/esimd.hpp>
#include <sycl/sycl.hpp>

using namespace sycl;
using namespace sycl::ext::intel::esimd;

int main() {
queue Q;
nd_range<1> NDR{range<1>{2}, range<1>{2}};
Q.parallel_for(NDR, [=](nd_item<1> NDI) SYCL_ESIMD_KERNEL {
slm_init(1024);
slm_init(1024);
}).wait();
// CHECK: error: slm_init is called more than once from kernel 'typeinfo name for main::'lambda'(sycl::_V1::nd_item<1>)'.

return 0;
}
34 changes: 34 additions & 0 deletions sycl/test/esimd/slm_init_invoke_simd.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// This test verifies call to slm_init from a function called through
// invoke_simd triggers an error.

// RUN: not %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s 2>&1 | FileCheck %s

#include <sycl/ext/intel/esimd.hpp>
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
#include <sycl/ext/oneapi/experimental/uniform.hpp>
#include <sycl/sycl.hpp>

#include <functional>
#include <iostream>
#include <type_traits>

using namespace sycl::ext::oneapi::experimental;
using namespace sycl;
namespace esimd = sycl::ext::intel::esimd;

SYCL_EXTERNAL
[[intel::device_indirectly_callable]] void __regcall SIMD_CALLEE_VOID()
SYCL_ESIMD_FUNCTION {
esimd::slm_init<1024>();
}

int main() {
queue Q;
nd_range<1> NDR{range<1>{2}, range<1>{2}};
Q.parallel_for(NDR, [=](nd_item<1> NDI) [[intel::reqd_sub_group_size(16)]] {
sub_group sg = NDI.get_sub_group();
invoke_simd(sg, SIMD_CALLEE_VOID);
}).wait();
return 0;
}
// CHECK: slm_init must be called directly from ESIMD kernel.
29 changes: 29 additions & 0 deletions sycl/test/esimd/slm_init_local_accessor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// RUN: %clangxx -fsycl %s

// This test verifies usage of slm_init and local_accessor in different kernels
// passes.

#include <iostream>
#include <sycl/ext/intel/esimd.hpp>
#include <sycl/sycl.hpp>

using namespace sycl;
using namespace sycl::ext::intel::esimd;

int main() {
queue Q;
nd_range<1> NDR{range<1>{2}, range<1>{2}};
Q.submit([&](handler &CGH) {
auto InAcc = local_accessor<int, 1>(5, CGH);
CGH.parallel_for(NDR, [=](nd_item<1> NDI) SYCL_ESIMD_KERNEL {
scalar_load<int>(InAcc, 0);
});
}).wait();

Q.submit([&](handler &CGH) {
CGH.parallel_for(NDR, [=](nd_item<1> NDI)
SYCL_ESIMD_KERNEL { slm_init(1024); });
}).wait();

return 0;
}
25 changes: 25 additions & 0 deletions sycl/test/esimd/slm_init_local_accessor_parameter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// RUN: not %clangxx -fsycl %s 2>&1 | FileCheck %s

// This test verifies usage of slm_init and local_accessor triggers an error.

#include <iostream>
#include <sycl/ext/intel/esimd.hpp>
#include <sycl/sycl.hpp>

using namespace sycl;
using namespace sycl::ext::intel::esimd;

int main() {
queue Q;
nd_range<1> NDR{range<1>{2}, range<1>{2}};
Q.submit([&](handler &CGH) {
auto InAcc = local_accessor<int, 1>(5, CGH);
CGH.parallel_for(NDR, [=](nd_item<1> NDI) SYCL_ESIMD_KERNEL {
slm_init(1024);
scalar_load<int>(InAcc, 0);
});
}).wait();
// CHECK: error: slm_init can not be used with local accessors.

return 0;
}
25 changes: 25 additions & 0 deletions sycl/test/esimd/slm_init_local_accessor_subscript.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// RUN: not %clangxx -fsycl %s 2>&1 | FileCheck %s

// This test verifies usage of slm_init and local_accessor triggers an error.

#include <iostream>
#include <sycl/ext/intel/esimd.hpp>
#include <sycl/sycl.hpp>

using namespace sycl;
using namespace sycl::ext::intel::esimd;

int main() {
queue Q;
nd_range<1> NDR{range<1>{2}, range<1>{2}};
Q.submit([&](handler &CGH) {
auto InAcc = local_accessor<int, 1>(5, CGH);
CGH.parallel_for(NDR, [=](nd_item<1> NDI) SYCL_ESIMD_KERNEL {
slm_init(1024);
InAcc[0] = 5;
});
}).wait();
// CHECK: error: slm_init can not be used with local accessors.

return 0;
}
31 changes: 31 additions & 0 deletions sycl/test/esimd/slm_init_noinline_check.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// RUN: not %clangxx -fsycl %s 2>&1 | FileCheck %s

// This test verifies call to slm_init from a function marked as
// noinline triggers an error.

#include <iostream>
#include <sycl/ext/intel/esimd.hpp>
#include <sycl/sycl.hpp>

using namespace sycl;
using namespace sycl::ext::intel::esimd;

#ifdef _MSC_VER
#define __SYCL_NOINLINE __declspec(noinline)
#else
#define __SYCL_NOINLINE __attribute__((noinline))
#endif

__SYCL_NOINLINE void bar() { slm_init(1024); }
__SYCL_NOINLINE void foo() {
slm_init(1024);
bar();
}

int main() {
queue Q;
nd_range<1> NDR{range<1>{2}, range<1>{2}};
Q.parallel_for(NDR, [=](nd_item<1> NDI) SYCL_ESIMD_KERNEL { foo(); }).wait();
return 0;
}
// CHECK: error: slm_init is called more than once from kernel 'typeinfo name for main::'lambda'(sycl::_V1::nd_item<1>)'.
Loading