diff --git a/IGC/AdaptorOCL/dllInterfaceCompute.cpp b/IGC/AdaptorOCL/dllInterfaceCompute.cpp index 194d381b9480..c4bd447e82f2 100644 --- a/IGC/AdaptorOCL/dllInterfaceCompute.cpp +++ b/IGC/AdaptorOCL/dllInterfaceCompute.cpp @@ -1219,6 +1219,26 @@ bool TranslateBuildSPMD( } } + // From pass IndVarSimplify we are only interested in optimization done by -replexitval. + // Disable other features that can have a negative impact on performance. + std::array indVarSimplifyFlags = { + "-indvars-post-increment-ranges=0", + "-disable-lftr=1", + "-indvars-widen-indvars=0", + "-verify-indvars=0" + }; + for (auto indVarSimplifyFlag : indVarSimplifyFlags) + { + auto indVarSimplifySwitch = optionsMap.find(indVarSimplifyFlag.drop_front(1).split("=").first); + if (indVarSimplifySwitch != optionsMap.end()) + { + if (indVarSimplifySwitch->getValue()->getNumOccurrences() == 0) + { + args.push_back(indVarSimplifyFlag.data()); + } + } + } + if (std::size(args) > 1) { llvm::cl::ParseCommandLineOptions(std::size(args), &args[0]); diff --git a/IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp b/IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp index 1a5f4dec17e9..4dcfd80ce4fd 100644 --- a/IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp +++ b/IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp @@ -1525,6 +1525,11 @@ void OptimizeIR(CodeGenContext* const pContext) mpm.add(createIGCInstructionCombiningPass()); + if (IGC_IS_FLAG_ENABLED(EnableIndVarSimplification)) + { + mpm.add(llvm::createIndVarSimplifyPass()); + } + if (IGC_IS_FLAG_ENABLED(EnableLoopHoistConstant)) { mpm.add(createLoopHoistConstant()); diff --git a/IGC/common/igc_flags.h b/IGC/common/igc_flags.h index 7c5a5d332e5d..5dce40bba224 100644 --- a/IGC/common/igc_flags.h +++ b/IGC/common/igc_flags.h @@ -209,6 +209,7 @@ DECLARE_IGC_REGKEY(bool, DisableIRVerification, false, "Setting this to DECLARE_IGC_REGKEY(bool, EnableJumpThreading, true, "Setting this to 1/true adds a compiler switch to enable llvm jumpThreading pass.", true) DECLARE_IGC_REGKEY(bool, DisableLoopUnroll, false, "Setting this to 1/true adds a compiler switch to disable loop unrolling.", true) DECLARE_IGC_REGKEY(DWORD, RuntimeLoopUnrolling, 0, "Setting this to switch on/off runtime loop unrolling. 0: default (on), 1: force on, 2: force off", false) +DECLARE_IGC_REGKEY(bool, EnableIndVarSimplification, true, "Enables IndVarSimplification pass.", true) DECLARE_IGC_REGKEY(bool, DisableBranchSwaping, false, "Setting this to 1/true adds a compiler switch to disable branch swapping.", false) DECLARE_IGC_REGKEY(bool, DisableSynchronizationObjectCoalescingPass, false, "Disable SynchronizationObjectCoalescing pass", false) DECLARE_IGC_REGKEY(bool, EnableIndependentSharedMemoryFenceFunctionality, false, "Enable treating global memory fences as shared memory fences in SynchronizationObjectCoalescing pass", false) diff --git a/IGC/ocloc_tests/optimizations/IndVarSimplification.cl b/IGC/ocloc_tests/optimizations/IndVarSimplification.cl new file mode 100644 index 000000000000..e471e92c3b27 --- /dev/null +++ b/IGC/ocloc_tests/optimizations/IndVarSimplification.cl @@ -0,0 +1,27 @@ +/*========================== begin_copyright_notice ============================ + +Copyright (C) 2024 Intel Corporation + +SPDX-License-Identifier: MIT + +============================= end_copyright_notice ===========================*/ + +// REQUIRES: regkeys,pvc-supported,llvm-14-plus + +// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'PrintToConsole=1,PrintAfter=EmitPass'" 2>&1 | FileCheck %s + +// CHECK-NOT: phi +// CHECK: [[LOAD:%.*]] = load i32, i32 addrspace(1)* [[PTR:%.*]] +// CHECK: [[ADD:%.*]] = add i32 [[LOAD]], 10000 +// CHECK: store i32 [[ADD]], i32 addrspace(1)* [[PTR]] + +kernel void test(global int* ptr) +{ + size_t id = get_global_id(0); + int val = ptr[id]; + for (int i = 0; i < 10000; i++) + { + val += 1; + } + ptr[id] = val; +}