Skip to content

Commit

Permalink
[SYCL][ESIMD] Rework -O0 behavior
Browse files Browse the repository at this point in the history
Signed-off-by: Sarnie, Nick <nick.sarnie@intel.com>
  • Loading branch information
sarnex committed Feb 7, 2024
1 parent c320f3b commit 83e79ed
Show file tree
Hide file tree
Showing 16 changed files with 125 additions and 46 deletions.
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -955,6 +955,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM,
OptimizationLevel Level) {
MPM.addPass(ESIMDVerifierPass(LangOpts.SYCLESIMDForceStatelessMem));
if (Level == OptimizationLevel::O0)
MPM.addPass(ESIMDRemoveOptnoneNoinlinePass());
MPM.addPass(SYCLPropagateAspectsUsagePass(/*ExcludeAspects=*/{"fp64"}));
MPM.addPass(SYCLPropagateJointMatrixUsagePass());
});
Expand Down
5 changes: 4 additions & 1 deletion clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10308,8 +10308,11 @@ static void addArgs(ArgStringList &DstArgs, const llvm::opt::ArgList &Alloc,
// Partially copied from clang/lib/Frontend/CompilerInvocation.cpp
static std::string getSYCLPostLinkOptimizationLevel(const ArgList &Args) {
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
// Pass -O2 when the user passes -O0 due to IGC
// debugging limitation. Note this only effects
// ESIMD code.
if (A->getOption().matches(options::OPT_O0))
return "-O0";
return "-O2";

if (A->getOption().matches(options::OPT_Ofast))
return "-O3";
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Driver/sycl-offload-intelfpga-emu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,6 @@
// RUN: %clang_cl -### -fintelfpga -Zi -Od -Xs "-DFOO1 -DFOO2" %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-TOOLS-IMPLIED-OPTS %s
// CHK-TOOLS-IMPLIED-OPTS: clang{{.*}} "-fsycl-is-device"{{.*}} "-O0"
// CHK-TOOLS-IMPLIED-OPTS: sycl-post-link{{.*}} "-O0"
// CHK-TOOLS-IMPLIED-OPTS: sycl-post-link{{.*}} "-O2"
// CHK-TOOLS-IMPLIED-OPTS: opencl-aot{{.*}} "--bo=-g -cl-opt-disable" "-DFOO1" "-DFOO2"

2 changes: 1 addition & 1 deletion clang/test/Driver/sycl-offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,7 @@
// only the last option is considered
// RUN: %clang -### -fsycl -O2 -O1 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O1
// RUN: %clang_cl -### -fsycl /O2 /O1 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-Os
// CHK-POST-LINK-OPT-LEVEL-O0: sycl-post-link{{.*}} "-O0"
// CHK-POST-LINK-OPT-LEVEL-O0: sycl-post-link{{.*}} "-O2"
// CHK-POST-LINK-OPT-LEVEL-O1: sycl-post-link{{.*}} "-O1"
// CHK-POST-LINK-OPT-LEVEL-O2: sycl-post-link{{.*}} "-O2"
// CHK-POST-LINK-OPT-LEVEL-O3: sycl-post-link{{.*}} "-O3"
Expand Down
9 changes: 9 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/ESIMD/LowerESIMD.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,14 @@ class PassRegistry;
/// like intrinsics to a form parsable by the ESIMD-aware SPIRV translator.
class SYCLLowerESIMDPass : public PassInfoMixin<SYCLLowerESIMDPass> {
public:
SYCLLowerESIMDPass(bool ModuleContainsScalar = true)
: ModuleContainsScalarCode(ModuleContainsScalar) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);

private:
bool prepareForAlwaysInliner(Module &M);
size_t runOnFunction(Function &F, SmallPtrSetImpl<Type *> &);
bool ModuleContainsScalarCode;
};

ModulePass *createSYCLLowerESIMDPass();
Expand Down Expand Up @@ -86,6 +89,12 @@ class ESIMDRemoveHostCodePass : public PassInfoMixin<ESIMDRemoveHostCodePass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
};

class ESIMDRemoveOptnoneNoinlinePass
: public PassInfoMixin<ESIMDRemoveOptnoneNoinlinePass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
};

} // namespace llvm

#endif // LLVM_SYCLLOWERIR_LOWERESIMD_H
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ MODULE_PASS("deadargelim-sycl", DeadArgumentEliminationSYCLPass())
MODULE_PASS("sycllowerwglocalmemory", SYCLLowerWGLocalMemoryPass())
MODULE_PASS("lower-esimd-kernel-attrs", SYCLFixupESIMDKernelWrapperMDPass())
MODULE_PASS("esimd-remove-host-code", ESIMDRemoveHostCodePass());
MODULE_PASS("esimd-remove-optnone-noinline", ESIMDRemoveOptnoneNoinlinePass());
MODULE_PASS("sycl-propagate-aspects-usage", SYCLPropagateAspectsUsagePass())
MODULE_PASS("sycl-propagate-joint-matrix-usage", SYCLPropagateJointMatrixUsagePass())
MODULE_PASS("sycl-add-opt-level-attribute", SYCLAddOptLevelAttributePass())
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/SYCLLowerIR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ add_llvm_component_library(LLVMSYCLLowerIR
ESIMD/ESIMDUtils.cpp
ESIMD/ESIMDVerifier.cpp
ESIMD/ESIMDRemoveHostCode.cpp
ESIMD/ESIMDRemoveOptnoneNoinline.cpp
ESIMD/LowerESIMD.cpp
ESIMD/LowerESIMDKernelAttrs.cpp
CleanupSYCLMetadata.cpp
Expand Down
45 changes: 45 additions & 0 deletions llvm/lib/SYCLLowerIR/ESIMD/ESIMDRemoveOptnoneNoinline.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//=-- ESIMDRemoveOptnoneNoinline.cpp - remove optnone/noinline for ESIMD --=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//=------------------------------------------------------------------------=//
// The GPU backend for ESIMD does not support debugging and we often see crashes
// or wrong answers if we do not optimize. Remove optnone and noinline from
// ESIMD functions so users at least can run their programs and get the
// right answer.
//=------------------------------------------------------------------------=//

#define DEBUG_TYPE "ESIMDRemoveOptnoneNoinlinePass"

#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h"
#include "llvm/SYCLLowerIR/ESIMD/LowerESIMD.h"
#include "llvm/Support/Debug.h"

using namespace llvm;
using namespace llvm::esimd;

cl::opt<bool> AllowOptnoneNoinline(
"esimd-allow-optnone-noinline", llvm::cl::Optional, llvm::cl::Hidden,
llvm::cl::desc("Allow optnone and noinline."), llvm::cl::init(false));

PreservedAnalyses ESIMDRemoveOptnoneNoinlinePass::run(Module &M,
ModuleAnalysisManager &) {
if (AllowOptnoneNoinline)
return PreservedAnalyses::all();

// TODO: Remove this pass once VC supports debugging.
bool Modified = false;
for (auto &F : M.functions()) {
if (!isESIMD(F) || F.hasFnAttribute("CMGenxSIMT"))
continue;
F.removeFnAttr(Attribute::OptimizeNone);
F.removeFnAttr(Attribute::NoInline);
Modified = true;
}
return Modified ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
20 changes: 9 additions & 11 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1803,6 +1803,15 @@ bool SYCLLowerESIMDPass::prepareForAlwaysInliner(Module &M) {
continue;
}

// If we are splitting by ESIMD, we are guarenteed the entire
// module only contains ESIMD code, so remove optnone/noinline
// from ALL functions as VC does not support these attributes
// and programs produce wrong answers or crash if they are kept.
if (!ModuleContainsScalarCode && !F.hasFnAttribute("CMGenxSIMT")) {
F.removeFnAttr(Attribute::NoInline);
F.removeFnAttr(Attribute::OptimizeNone);
}

// TODO: The next code and comment was placed to ESIMDLoweringPass
// 2 years ago, when GPU VC BE did not support function calls and
// required everything to be inlined right into the kernel unless
Expand Down Expand Up @@ -1908,17 +1917,6 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
SmallVector<CallInst *, 32> ESIMDIntrCalls;
SmallVector<Instruction *, 8> ToErase;

// The VC backend doesn't support debugging, and trying to use
// non-optimized code often produces crashes or wrong answers.
// The recommendation from the VC team was always optimize code,
// even if the user requested no optimization. We already drop
// debugging flags in the SYCL runtime, so also drop optnone and
// noinline here.
if (isESIMD(F) && F.hasFnAttribute(Attribute::OptimizeNone)) {
F.removeFnAttr(Attribute::OptimizeNone);
F.removeFnAttr(Attribute::NoInline);
}

for (Instruction &I : instructions(F)) {
if (auto CastOp = dyn_cast<llvm::CastInst>(&I)) {
llvm::Type *DstTy = CastOp->getDestTy();
Expand Down
13 changes: 11 additions & 2 deletions llvm/test/SYCLLowerIR/ESIMD/optnone.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; This ensures we remove optnone from ESIMD functions
; RUN: opt < %s -passes=LowerESIMD -S | FileCheck %s
; This ensures we remove optnone from ESIMD functions unless they are SIMT.
; RUN: opt < %s -passes=esimd-remove-optnone-noinline -S | FileCheck %s

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"
Expand All @@ -9,12 +9,21 @@ define dso_local spir_func void @esimd() #0 !sycl_explicit_simd !0 {
ret void
}

define dso_local spir_func void @esimd_simt() #1 !sycl_explicit_simd !0 {
; CHECK: void @esimd_simt() #[[#ESIMDSIMTAttr:]]
ret void
}

define dso_local spir_func void @sycl() #0 {
; CHECK: spir_func void @sycl() #[[#SYCLAttr:]]
ret void
}
; CHECK: attributes #[[#ESIMDAttr]] =
; CHECK-NOT: optnone
; CHECK: attributes #[[#ESIMDSIMTAttr]] = {{{.*}}optnone
; CHECK-SAME: "CMGenxSIMT"
; CHECK-SAME: }
; CHECK: attributes #[[#SYCLAttr]] = {{{.*}}optnone{{.*}}}
attributes #0 = { noinline optnone convergent }
attributes #1 = { noinline optnone convergent "CMGenxSIMT"}
!0 = !{}
32 changes: 32 additions & 0 deletions llvm/test/tools/sycl-post-link/sycl-esimd/optnone.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; This ensures we remove optnone from ESIMD functions unless they are SIMT or we didn't split ESIMD code out.
; RUN: sycl-post-link -split-esimd -lower-esimd -S < %s -o %t.table
; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK,CHECK-ESIMD-SPLIT

; RUN: sycl-post-link -lower-esimd -S < %s -o %t1.table
; RUN: FileCheck %s -input-file=%t1_esimd_0.ll --check-prefixes CHECK,CHECK-NO-ESIMD-SPLIT
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"

define dso_local spir_func void @esimd() #0 !sycl_explicit_simd !0 {
; CHECK: void @esimd() #[[#ESIMDAttr:]]
ret void
}

define dso_local spir_func void @esimd_simt() #1 !sycl_explicit_simd !0 {
; CHECK: void @esimd_simt() #[[#ESIMDSIMTAttr:]]
ret void
}

define dso_local spir_func void @sycl() #0 {
; CHECK: spir_func void @sycl() #[[#ESIMDAttr]]
ret void
}
; CHECK-ESIMD-SPLIT: attributes #[[#ESIMDAttr]] =
; CHECK-ESIMD-SPLIT-NOT: optnone
; CHECK-NO-ESIMD-SPLIT: attributes #[[#ESIMDAttr]] = {{{.*}}optnone{{.*}}}
; CHECK: attributes #[[#ESIMDSIMTAttr]] = {{{.*}}optnone
; CHECK-SAME: "CMGenxSIMT"
; CHECK-SAME: }
attributes #0 = { noinline optnone convergent }
attributes #1 = { noinline optnone convergent "CMGenxSIMT"}
!0 = !{}
2 changes: 1 addition & 1 deletion llvm/tools/sycl-post-link/sycl-post-link.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@ bool lowerEsimdConstructs(module_split::ModuleDesc &MD) {
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);

ModulePassManager MPM;
MPM.addPass(SYCLLowerESIMDPass{});
MPM.addPass(SYCLLowerESIMDPass(!SplitEsimd));

if (!OptLevelO0) {
FunctionPassManager FPM;
Expand Down
25 changes: 0 additions & 25 deletions sycl/test-e2e/ESIMD/slm_alloc_many_kernels_many_funcs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,6 @@
// Y Z
//
// SLM offsets are expected to be:
// 1) no inlining case:
// --- Kernel0
// X - N1
// Y - N1 + X
// Z - max(N1 + X, N2)
// --- Kernel2
// X - 0 (not reachable, offset not updated in the result)
// Y - 0 (not reachable, offset not updated in the result)
// Z - max(N1 + X, N2)
// 2) forced inlining case:
// --- Kernel0
// X - N1
// Y - N1 + X
Expand All @@ -45,9 +35,6 @@
// X - 0 (not reachable, offset not updated in the result)
// Y - 0 (not reachable, offset not updated in the result)
// Z - N2
// Note the difference in SLM offset for Z in the inlining/no-inlining cases for
// Z scope. This is because inlining effectively splits Z scope when inlining
// into kernel0 and kernel2

#include <sycl/ext/intel/esimd.hpp>
#include <sycl/sycl.hpp>
Expand Down Expand Up @@ -157,26 +144,14 @@ int main(void) {
// Z - max(N1 + X, N2)
gold_arr0[x_off_ind] = SLM_N1;
gold_arr0[y_off_ind] = SLM_N1 + SLM_X;

// For kernel0 inline/no-inline case splits for Z:
#ifdef FORCE_INLINE
gold_arr0[z_off_ind] = SLM_N1;
#else
gold_arr0[z_off_ind] = std::max(SLM_N1 + SLM_X, SLM_N2);
#endif // FORCE_INLINE

// For kernel1 inline/no-inline results are the same for X and Y:
// X - 0
// Y - 0
gold_arr1[x_off_ind] = 0;
gold_arr1[y_off_ind] = 0;

// For kernel1 inline/no-inline case splits for Z:
#ifdef FORCE_INLINE
gold_arr1[z_off_ind] = SLM_N2;
#else
gold_arr1[z_off_ind] = std::max(SLM_N1 + SLM_X, SLM_N2);
#endif // FORCE_INLINE

T *test_arr = arr;
int err_cnt = 0;
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/esimd/genx_func_attr.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: %clangxx -O2 -fsycl -fsycl-device-only -Xclang -emit-llvm %s -o %t
// RUN: sycl-post-link -split-esimd -lower-esimd -O2 -S %t -o %t.table
// RUN: sycl-post-link -lower-esimd -O2 -S %t -o %t.table
// RUN: FileCheck %s -input-file=%t_esimd_0.ll

// Checks ESIMD intrinsic translation.
Expand Down
7 changes: 4 additions & 3 deletions sycl/test/esimd/intrins_trans.cpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t
// RUN: %clangxx %clang_O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table
// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL

// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t
// RUN: %clangxx %clang_O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=true -O0 -S %t -o %t.table
// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS

// Checks ESIMD intrinsic translation with opaque pointers.
// NOTE: must be run in -O0, as optimizer optimizes away some of the code
// NOTE: must be run with %clang_O0, as optimizer optimizes away some of the
// code

#include <sycl/detail/image_ocl_types.hpp>
#include <sycl/ext/intel/esimd.hpp>
Expand Down
3 changes: 3 additions & 0 deletions sycl/test/esimd/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import platform

config.substitutions.append(("%clang_O0", "-O0 -mllvm -esimd-allow-optnone-noinline"))

0 comments on commit 83e79ed

Please sign in to comment.