Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][ESIMD] Rework -O0 behavior #12612

Merged
merged 4 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM,
OptimizationLevel Level) {
MPM.addPass(ESIMDVerifierPass(LangOpts.SYCLESIMDForceStatelessMem));
if (Level == OptimizationLevel::O0)
MPM.addPass(ESIMDRemoveOptnoneNoinlinePass());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am okay with this ESIMD change.

MPM.addPass(SYCLPropagateAspectsUsagePass(/*ExcludeAspects=*/{"fp64"}));
MPM.addPass(SYCLPropagateJointMatrixUsagePass());
});
Expand Down
5 changes: 4 additions & 1 deletion clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10316,8 +10316,11 @@ static void addArgs(ArgStringList &DstArgs, const llvm::opt::ArgList &Alloc,
// Partially copied from clang/lib/Frontend/CompilerInvocation.cpp
static std::string getSYCLPostLinkOptimizationLevel(const ArgList &Args) {
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
// Pass -O2 when the user passes -O0 due to IGC
// debugging limitation. Note this only effects
// ESIMD code.
if (A->getOption().matches(options::OPT_O0))
return "-O0";
return "-O2";

if (A->getOption().matches(options::OPT_Ofast))
return "-O3";
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Driver/sycl-offload-intelfpga-emu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,6 @@
// RUN: %clang_cl -### -fintelfpga -Zi -Od -Xs "-DFOO1 -DFOO2" %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-TOOLS-IMPLIED-OPTS %s
// CHK-TOOLS-IMPLIED-OPTS: clang{{.*}} "-fsycl-is-device"{{.*}} "-O0"
// CHK-TOOLS-IMPLIED-OPTS: sycl-post-link{{.*}} "-O0"
// CHK-TOOLS-IMPLIED-OPTS: sycl-post-link{{.*}} "-O2"
// CHK-TOOLS-IMPLIED-OPTS: opencl-aot{{.*}} "--bo=-g -cl-opt-disable" "-DFOO1" "-DFOO2"

2 changes: 1 addition & 1 deletion clang/test/Driver/sycl-offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,7 @@
// only the last option is considered
// RUN: %clang -### -fsycl -O2 -O1 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O1
// RUN: %clang_cl -### -fsycl /O2 /O1 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-Os
// CHK-POST-LINK-OPT-LEVEL-O0: sycl-post-link{{.*}} "-O0"
// CHK-POST-LINK-OPT-LEVEL-O0: sycl-post-link{{.*}} "-O2"
// CHK-POST-LINK-OPT-LEVEL-O1: sycl-post-link{{.*}} "-O1"
// CHK-POST-LINK-OPT-LEVEL-O2: sycl-post-link{{.*}} "-O2"
// CHK-POST-LINK-OPT-LEVEL-O3: sycl-post-link{{.*}} "-O3"
Expand Down
9 changes: 9 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/ESIMD/LowerESIMD.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,14 @@ class PassRegistry;
/// like intrinsics to a form parsable by the ESIMD-aware SPIRV translator.
class SYCLLowerESIMDPass : public PassInfoMixin<SYCLLowerESIMDPass> {
public:
SYCLLowerESIMDPass(bool ModuleContainsScalar = true)
: ModuleContainsScalarCode(ModuleContainsScalar) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);

private:
bool prepareForAlwaysInliner(Module &M);
size_t runOnFunction(Function &F, SmallPtrSetImpl<Type *> &);
bool ModuleContainsScalarCode;
};

ModulePass *createSYCLLowerESIMDPass();
Expand Down Expand Up @@ -86,6 +89,12 @@ class ESIMDRemoveHostCodePass : public PassInfoMixin<ESIMDRemoveHostCodePass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
};

class ESIMDRemoveOptnoneNoinlinePass
: public PassInfoMixin<ESIMDRemoveOptnoneNoinlinePass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
};

} // namespace llvm

#endif // LLVM_SYCLLOWERIR_LOWERESIMD_H
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ MODULE_PASS("deadargelim-sycl", DeadArgumentEliminationSYCLPass())
MODULE_PASS("sycllowerwglocalmemory", SYCLLowerWGLocalMemoryPass())
MODULE_PASS("lower-esimd-kernel-attrs", SYCLFixupESIMDKernelWrapperMDPass())
MODULE_PASS("esimd-remove-host-code", ESIMDRemoveHostCodePass());
MODULE_PASS("esimd-remove-optnone-noinline", ESIMDRemoveOptnoneNoinlinePass());
MODULE_PASS("sycl-propagate-aspects-usage", SYCLPropagateAspectsUsagePass())
MODULE_PASS("sycl-propagate-joint-matrix-usage", SYCLPropagateJointMatrixUsagePass())
MODULE_PASS("sycl-add-opt-level-attribute", SYCLAddOptLevelAttributePass())
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/SYCLLowerIR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ add_llvm_component_library(LLVMSYCLLowerIR
ESIMD/ESIMDUtils.cpp
ESIMD/ESIMDVerifier.cpp
ESIMD/ESIMDRemoveHostCode.cpp
ESIMD/ESIMDRemoveOptnoneNoinline.cpp
ESIMD/LowerESIMD.cpp
ESIMD/LowerESIMDKernelAttrs.cpp
CleanupSYCLMetadata.cpp
Expand Down
43 changes: 43 additions & 0 deletions llvm/lib/SYCLLowerIR/ESIMD/ESIMDRemoveOptnoneNoinline.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//=-- ESIMDRemoveOptnoneNoinline.cpp - remove optnone/noinline for ESIMD --=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//=------------------------------------------------------------------------=//
// The GPU backend for ESIMD does not support debugging and we often see crashes
// or wrong answers if we do not optimize. Remove optnone and noinline from
// ESIMD functions so users at least can run their programs and get the
// right answer.
//=------------------------------------------------------------------------=//

#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h"
#include "llvm/SYCLLowerIR/ESIMD/LowerESIMD.h"
#include "llvm/Support/Debug.h"

using namespace llvm;
using namespace llvm::esimd;

cl::opt<bool> AllowOptnoneNoinline(
"esimd-allow-optnone-noinline", llvm::cl::Optional, llvm::cl::Hidden,
llvm::cl::desc("Allow optnone and noinline."), llvm::cl::init(false));

PreservedAnalyses ESIMDRemoveOptnoneNoinlinePass::run(Module &M,
ModuleAnalysisManager &) {
if (AllowOptnoneNoinline)
return PreservedAnalyses::all();

// TODO: Remove this pass once VC supports debugging.
bool Modified = false;
for (auto &F : M.functions()) {
if (!isESIMD(F) || F.hasFnAttribute("CMGenxSIMT"))
continue;
F.removeFnAttr(Attribute::OptimizeNone);
F.removeFnAttr(Attribute::NoInline);
Modified = true;
}
return Modified ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
20 changes: 9 additions & 11 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1834,6 +1834,15 @@ bool SYCLLowerESIMDPass::prepareForAlwaysInliner(Module &M) {
continue;
}

// If we are splitting by ESIMD, we are guarenteed the entire
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this code still needed after adding the special pass that remove those 2 attributes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I tested many combinations of with/without different changes in this PR and everything we currently have is required.

// module only contains ESIMD code, so remove optnone/noinline
// from ALL functions as VC does not support these attributes
// and programs produce wrong answers or crash if they are kept.
if (!ModuleContainsScalarCode && !F.hasFnAttribute("CMGenxSIMT")) {
F.removeFnAttr(Attribute::NoInline);
F.removeFnAttr(Attribute::OptimizeNone);
}

// TODO: The next code and comment was placed to ESIMDLoweringPass
// 2 years ago, when GPU VC BE did not support function calls and
// required everything to be inlined right into the kernel unless
Expand Down Expand Up @@ -1939,17 +1948,6 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
SmallVector<CallInst *, 32> ESIMDIntrCalls;
SmallVector<Instruction *, 8> ToErase;

// The VC backend doesn't support debugging, and trying to use
// non-optimized code often produces crashes or wrong answers.
// The recommendation from the VC team was always optimize code,
// even if the user requested no optimization. We already drop
// debugging flags in the SYCL runtime, so also drop optnone and
// noinline here.
if (isESIMD(F) && F.hasFnAttribute(Attribute::OptimizeNone)) {
F.removeFnAttr(Attribute::OptimizeNone);
F.removeFnAttr(Attribute::NoInline);
}

for (Instruction &I : instructions(F)) {
if (auto CastOp = dyn_cast<llvm::CastInst>(&I)) {
llvm::Type *DstTy = CastOp->getDestTy();
Expand Down
13 changes: 11 additions & 2 deletions llvm/test/SYCLLowerIR/ESIMD/optnone.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; This ensures we remove optnone from ESIMD functions
; RUN: opt < %s -passes=LowerESIMD -S | FileCheck %s
; This ensures we remove optnone from ESIMD functions unless they are SIMT.
; RUN: opt < %s -passes=esimd-remove-optnone-noinline -S | FileCheck %s

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"
Expand All @@ -9,12 +9,21 @@ define dso_local spir_func void @esimd() #0 !sycl_explicit_simd !0 {
ret void
}

define dso_local spir_func void @esimd_simt() #1 !sycl_explicit_simd !0 {
; CHECK: void @esimd_simt() #[[#ESIMDSIMTAttr:]]
ret void
}

define dso_local spir_func void @sycl() #0 {
; CHECK: spir_func void @sycl() #[[#SYCLAttr:]]
ret void
}
; CHECK: attributes #[[#ESIMDAttr]] =
; CHECK-NOT: optnone
; CHECK: attributes #[[#ESIMDSIMTAttr]] = {{{.*}}optnone
; CHECK-SAME: "CMGenxSIMT"
; CHECK-SAME: }
; CHECK: attributes #[[#SYCLAttr]] = {{{.*}}optnone{{.*}}}
attributes #0 = { noinline optnone convergent }
attributes #1 = { noinline optnone convergent "CMGenxSIMT"}
!0 = !{}
32 changes: 32 additions & 0 deletions llvm/test/tools/sycl-post-link/sycl-esimd/optnone.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; This ensures we remove optnone from ESIMD functions unless they are SIMT or we didn't split ESIMD code out.
; RUN: sycl-post-link -split-esimd -lower-esimd -S < %s -o %t.table
; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK,CHECK-ESIMD-SPLIT

; RUN: sycl-post-link -lower-esimd -S < %s -o %t1.table
; RUN: FileCheck %s -input-file=%t1_esimd_0.ll --check-prefixes CHECK,CHECK-NO-ESIMD-SPLIT
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"

define dso_local spir_func void @esimd() #0 !sycl_explicit_simd !0 {
; CHECK: void @esimd() #[[#ESIMDAttr:]]
ret void
}

define dso_local spir_func void @esimd_simt() #1 !sycl_explicit_simd !0 {
; CHECK: void @esimd_simt() #[[#ESIMDSIMTAttr:]]
ret void
}

define dso_local spir_func void @sycl() #0 {
; CHECK: spir_func void @sycl() #[[#ESIMDAttr]]
ret void
}
; CHECK-ESIMD-SPLIT: attributes #[[#ESIMDAttr]] =
; CHECK-ESIMD-SPLIT-NOT: optnone
; CHECK-NO-ESIMD-SPLIT: attributes #[[#ESIMDAttr]] = {{{.*}}optnone{{.*}}}
; CHECK: attributes #[[#ESIMDSIMTAttr]] = {{{.*}}optnone
; CHECK-SAME: "CMGenxSIMT"
; CHECK-SAME: }
attributes #0 = { noinline optnone convergent }
attributes #1 = { noinline optnone convergent "CMGenxSIMT"}
!0 = !{}
2 changes: 1 addition & 1 deletion llvm/tools/sycl-post-link/sycl-post-link.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@ bool lowerEsimdConstructs(module_split::ModuleDesc &MD) {
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);

ModulePassManager MPM;
MPM.addPass(SYCLLowerESIMDPass{});
MPM.addPass(SYCLLowerESIMDPass(!SplitEsimd));

if (!OptLevelO0) {
FunctionPassManager FPM;
Expand Down
25 changes: 0 additions & 25 deletions sycl/test-e2e/ESIMD/slm_alloc_many_kernels_many_funcs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,6 @@
// Y Z
//
// SLM offsets are expected to be:
// 1) no inlining case:
// --- Kernel0
// X - N1
// Y - N1 + X
// Z - max(N1 + X, N2)
// --- Kernel2
// X - 0 (not reachable, offset not updated in the result)
// Y - 0 (not reachable, offset not updated in the result)
// Z - max(N1 + X, N2)
// 2) forced inlining case:
// --- Kernel0
// X - N1
// Y - N1 + X
Expand All @@ -45,9 +35,6 @@
// X - 0 (not reachable, offset not updated in the result)
// Y - 0 (not reachable, offset not updated in the result)
// Z - N2
// Note the difference in SLM offset for Z in the inlining/no-inlining cases for
// Z scope. This is because inlining effectively splits Z scope when inlining
// into kernel0 and kernel2

#include <sycl/ext/intel/esimd.hpp>
#include <sycl/sycl.hpp>
Expand Down Expand Up @@ -157,26 +144,14 @@ int main(void) {
// Z - max(N1 + X, N2)
gold_arr0[x_off_ind] = SLM_N1;
gold_arr0[y_off_ind] = SLM_N1 + SLM_X;

// For kernel0 inline/no-inline case splits for Z:
#ifdef FORCE_INLINE
gold_arr0[z_off_ind] = SLM_N1;
#else
gold_arr0[z_off_ind] = std::max(SLM_N1 + SLM_X, SLM_N2);
#endif // FORCE_INLINE

// For kernel1 inline/no-inline results are the same for X and Y:
// X - 0
// Y - 0
gold_arr1[x_off_ind] = 0;
gold_arr1[y_off_ind] = 0;

// For kernel1 inline/no-inline case splits for Z:
#ifdef FORCE_INLINE
gold_arr1[z_off_ind] = SLM_N2;
#else
gold_arr1[z_off_ind] = std::max(SLM_N1 + SLM_X, SLM_N2);
#endif // FORCE_INLINE

T *test_arr = arr;
int err_cnt = 0;
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/esimd/genx_func_attr.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: %clangxx -O2 -fsycl -fsycl-device-only -Xclang -emit-llvm %s -o %t
// RUN: sycl-post-link -split-esimd -lower-esimd -O2 -S %t -o %t.table
// RUN: sycl-post-link -lower-esimd -O2 -S %t -o %t.table
// RUN: FileCheck %s -input-file=%t_esimd_0.ll

// Checks ESIMD intrinsic translation.
Expand Down
7 changes: 4 additions & 3 deletions sycl/test/esimd/intrins_trans.cpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t
// RUN: %clangxx %clang_O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table
// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL

// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t
// RUN: %clangxx %clang_O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=true -O0 -S %t -o %t.table
// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS

// Checks ESIMD intrinsic translation with opaque pointers.
// NOTE: must be run in -O0, as optimizer optimizes away some of the code
// NOTE: must be run with %clang_O0, as optimizer optimizes away some of the
// code

#include <sycl/detail/image_ocl_types.hpp>
#include <sycl/ext/intel/esimd.hpp>
Expand Down
3 changes: 3 additions & 0 deletions sycl/test/esimd/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import platform

config.substitutions.append(("%clang_O0", "-O0 -mllvm -esimd-allow-optnone-noinline"))
Loading