From 28ad061c459566f5dd9aa405628549d554a36939 Mon Sep 17 00:00:00 2001 From: Gang Y Chen Date: Thu, 15 Jun 2023 23:55:03 +0000 Subject: [PATCH] uniform analysis cross-loop def-use random add another condition for def-use affected by divergent-branch --- IGC/AdaptorCommon/DivergentBarrierPass.cpp | 4 +- IGC/Compiler/CISACodeGen/VectorPreProcess.cpp | 5 +- IGC/Compiler/CISACodeGen/WIAnalysis.cpp | 48 +++++++++++++++++-- IGC/Compiler/CISACodeGen/WIAnalysis.hpp | 16 +++---- 4 files changed, 56 insertions(+), 17 deletions(-) diff --git a/IGC/AdaptorCommon/DivergentBarrierPass.cpp b/IGC/AdaptorCommon/DivergentBarrierPass.cpp index 94dd90c417c2..21d99c4198cd 100644 --- a/IGC/AdaptorCommon/DivergentBarrierPass.cpp +++ b/IGC/AdaptorCommon/DivergentBarrierPass.cpp @@ -29,6 +29,7 @@ SPDX-License-Identifier: MIT #include "llvmWrapper/IR/DerivedTypes.h" #include "llvm/IR/Module.h" #include "llvm/IR/InstIterator.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Verifier.h" @@ -96,10 +97,11 @@ bool DivergentBarrierPass::hasDivergentBarrier( PostDominatorTree PDT(*F); DominatorTree DT(*F); + LoopInfo LI(DT); TranslationTable TT; TT.run(*F); - WIAnalysisRunner WI(F, &DT, &PDT, m_MDUtils, m_CGCtx, ModMD, &TT); + WIAnalysisRunner WI(F, &LI, &DT, &PDT, m_MDUtils, m_CGCtx, ModMD, &TT); WI.run(); return llvm::any_of(Barriers, [&](Instruction* I) { diff --git a/IGC/Compiler/CISACodeGen/VectorPreProcess.cpp b/IGC/Compiler/CISACodeGen/VectorPreProcess.cpp index ff909e6fb6e1..4143f551fba6 100644 --- a/IGC/Compiler/CISACodeGen/VectorPreProcess.cpp +++ b/IGC/Compiler/CISACodeGen/VectorPreProcess.cpp @@ -357,6 +357,7 @@ namespace AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } private: @@ -1809,12 +1810,14 @@ bool VectorPreProcess::runOnFunction(Function& F) &getAnalysis().getDomTree(); auto* PDT = &getAnalysis().getPostDomTree(); + auto *LI = + &getAnalysis().getLoopInfo(); auto* ModMD = getAnalysis().getModuleMetaData(); TranslationTable TT; TT.run(F); - WIAnalysisRunner WI(&F, DT, PDT, MDUtils, m_CGCtx, ModMD, &TT); + WIAnalysisRunner WI(&F, LI, DT, PDT, MDUtils, m_CGCtx, ModMD, &TT); WI.run(); for (uint32_t i = 0; i < m_WorkList.size(); ++i) diff --git a/IGC/Compiler/CISACodeGen/WIAnalysis.cpp b/IGC/Compiler/CISACodeGen/WIAnalysis.cpp index 69cad7005e87..9d8eeab1a62b 100644 --- a/IGC/Compiler/CISACodeGen/WIAnalysis.cpp +++ b/IGC/Compiler/CISACodeGen/WIAnalysis.cpp @@ -274,6 +274,7 @@ void WIAnalysisRunner::dump() const void WIAnalysisRunner::init( llvm::Function* F, + llvm::LoopInfo* LI, llvm::DominatorTree* DT, llvm::PostDominatorTree* PDT, IGC::IGCMD::MetaDataUtils* MDUtils, @@ -282,6 +283,7 @@ void WIAnalysisRunner::init( IGC::TranslationTable* TransTable) { m_func = F; + this->LI = LI; this->DT = DT; this->PDT = PDT; m_pMdUtils = MDUtils; @@ -381,11 +383,12 @@ bool WIAnalysis::runOnFunction(Function& F) auto* MDUtils = getAnalysis().getMetaDataUtils(); auto* DT = &getAnalysis().getDomTree(); auto* PDT = &getAnalysis().getPostDomTree(); - auto* CGCtx = getAnalysis().getCodeGenContext(); + auto *LI = &getAnalysis().getLoopInfo(); + auto *CGCtx = getAnalysis().getCodeGenContext(); auto* ModMD = getAnalysis().getModuleMetaData(); auto* pTT = &getAnalysis(); - Runner.init(&F, DT, PDT, MDUtils, CGCtx, ModMD, pTT); + Runner.init(&F, LI, DT, PDT, MDUtils, CGCtx, ModMD, pTT); return Runner.run(); } @@ -911,17 +914,47 @@ void WIAnalysisRunner::update_cf_dep(const IGCLLVM::TerminatorInst* inst) BranchInfo br_info(inst, ipd); // debug: dump influence region and partial-joins // br_info.print(ods()); - + auto* CbrLoop = LI->getLoopFor(blk); + // Loop* IPDLoop = nullptr; // check dep-type for every phi in the full join if (ipd) { updatePHIDepAtJoin(ipd, &br_info); + // IPDLoop = LI->getLoopFor(ipd); } // check dep-type for every phi in the partial-joins for (SmallPtrSet::iterator join_it = br_info.partial_joins.begin(), join_e = br_info.partial_joins.end(); join_it != join_e; ++join_it) { - updatePHIDepAtJoin(*join_it, &br_info); + auto *PJ = *join_it; + // skip the special loop-entry case + if (DT->dominates(PJ, blk)) { + int NumPreds = 0; + for (auto *pred : predecessors(PJ)) { + if (br_info.influence_region.count(pred)) { + NumPreds++; + } + } + if (NumPreds <= 1) + continue; + } + auto PJDom = DT->getNode(PJ)->getIDom()->getBlock(); + // If both partial-join and it IDom are in partial-join region + // there are cases in which phi-nodes in partial-joins are not + // relevant to the cbr under the investigation + auto LoopA = LI->getLoopFor(PJDom); + //auto LoopB = LI->getLoopFor(PJ); + if (br_info.partial_joins.count(PJDom)) + { + // both PJ and its IDom are outside the CBR loop + if (!CbrLoop || !CbrLoop->contains(LoopA)) + continue; + // cbr and its IPD are at the same loop level + // the influence region can be considered as a DAG + // if (IPDLoop == CbrLoop) + // continue; + } + updatePHIDepAtJoin(PJ, &br_info); } // walk through all the instructions in the influence-region @@ -970,6 +1003,7 @@ void WIAnalysisRunner::update_cf_dep(const IGCLLVM::TerminatorInst* inst) // 1) if use is in the full-join // 2) if use is even outside the full-join // 3) if use is in partial-join but def is not in partial-join + // 4) if def and use are in partial-join but def inside loop Value::use_iterator use_it = defi->use_begin(); Value::use_iterator use_e = defi->use_end(); for (; use_it != use_e; ++use_it) @@ -989,10 +1023,14 @@ void WIAnalysisRunner::update_cf_dep(const IGCLLVM::TerminatorInst* inst) // local def-use, not related to control-dependence continue; // skip } + auto DefLoop = LI->getLoopFor(def_blk); + auto UseLoop = LI->getLoopFor(user_blk); if (user_blk == br_info.full_join || !br_info.influence_region.count(user_blk) || (br_info.partial_joins.count(user_blk) && - !br_info.partial_joins.count(def_blk)) + (!br_info.partial_joins.count(def_blk) || + (DefLoop && !DefLoop->contains(UseLoop))) + ) ) { updateDepMap(defi, instDep); diff --git a/IGC/Compiler/CISACodeGen/WIAnalysis.hpp b/IGC/Compiler/CISACodeGen/WIAnalysis.hpp index 8acc1d58b68c..540693e9f141 100644 --- a/IGC/Compiler/CISACodeGen/WIAnalysis.hpp +++ b/IGC/Compiler/CISACodeGen/WIAnalysis.hpp @@ -27,6 +27,7 @@ SPDX-License-Identifier: MIT #include #include #include +#include #include #include "common/LLVMWarningsPop.hpp" @@ -78,6 +79,7 @@ namespace IGC public: void init( llvm::Function* F, + llvm::LoopInfo* LI, llvm::DominatorTree* DT, llvm::PostDominatorTree* PDT, IGCMD::MetaDataUtils* MDUtils, @@ -87,6 +89,7 @@ namespace IGC WIAnalysisRunner( llvm::Function* F, + llvm::LoopInfo* LI, llvm::DominatorTree* DT, llvm::PostDominatorTree* PDT, IGCMD::MetaDataUtils* MDUtils, @@ -94,7 +97,7 @@ namespace IGC ModuleMetaData* ModMD, TranslationTable* TransTable) { - init(F, DT, PDT, MDUtils, CGCtx, ModMD, TransTable); + init(F, LI, DT, PDT, MDUtils, CGCtx, ModMD, TransTable); } WIAnalysisRunner() {} @@ -247,12 +250,6 @@ namespace IGC bool& IsLzUniform); private: -#ifdef OCL_SPECIFIC - // @brief pointer to Soa alloca analysis performed for this function - SoaAllocaAnalysis* m_soaAllocaAnalysis = nullptr; - /// Runtime services pointer - RuntimeServices* m_rtServices = nullptr; -#endif /// The WIAnalysis follows pointer arithmetic /// and Index arithmetic when calculating dependency @@ -281,6 +278,7 @@ namespace IGC std::vector m_forcedUniforms; llvm::Function* m_func = nullptr; + llvm::LoopInfo *LI = nullptr; llvm::DominatorTree* DT = nullptr; llvm::PostDominatorTree* PDT = nullptr; IGC::IGCMD::MetaDataUtils* m_pMdUtils = nullptr; @@ -325,11 +323,9 @@ namespace IGC { // Analysis pass preserve all AU.setPreservesAll(); -#ifdef OCL_SPECIFIC - AU.addRequired(); -#endif AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired();