From 48516b6dd84a8a9350a1c5dbd363dded1891d115 Mon Sep 17 00:00:00 2001 From: Christian Hagedorn Date: Fri, 24 Nov 2023 18:58:56 +0100 Subject: [PATCH] Refactor Loop Unswitching --- src/hotspot/share/opto/loopUnswitch.cpp | 265 ++++++++++++++---------- src/hotspot/share/opto/loopnode.hpp | 12 +- 2 files changed, 166 insertions(+), 111 deletions(-) diff --git a/src/hotspot/share/opto/loopUnswitch.cpp b/src/hotspot/share/opto/loopUnswitch.cpp index 98725cb5fe6ea..fde1d2ad3526d 100644 --- a/src/hotspot/share/opto/loopUnswitch.cpp +++ b/src/hotspot/share/opto/loopUnswitch.cpp @@ -83,13 +83,13 @@ bool IdealLoopTree::policy_unswitching( PhaseIdealLoop *phase ) const { return phase->may_require_nodes(est_loop_clone_sz(2)); } -//------------------------------find_unswitching_candidate----------------------------- -// Find candidate "if" for unswitching +// Find invariant test in loop body that does not exit the loop. If multiple are found, we pick the first one in the +// loop body. Return the candidate "if" for unswitching. IfNode* PhaseIdealLoop::find_unswitching_candidate(const IdealLoopTree *loop) const { // Find first invariant test that doesn't exit the loop LoopNode *head = loop->_head->as_Loop(); - IfNode* unswitch_iff = nullptr; + IfNode* unswitching_candidate = nullptr; Node* n = head->in(LoopNode::LoopBackControl); while (n != head) { Node* n_dom = idom(n); @@ -102,7 +102,7 @@ IfNode* PhaseIdealLoop::find_unswitching_candidate(const IdealLoopTree *loop) co // If condition is invariant and not a loop exit, // then found reason to unswitch. if (loop->is_invariant(bol) && !loop->is_loop_exit(iff)) { - unswitch_iff = iff; + unswitching_candidate = iff; } } } @@ -110,23 +110,21 @@ IfNode* PhaseIdealLoop::find_unswitching_candidate(const IdealLoopTree *loop) co } n = n_dom; } - return unswitch_iff; + return unswitching_candidate; } -//------------------------------do_unswitching----------------------------- -// Clone loop with an invariant test (that does not exit) and -// insert a clone of the test that selects which version to -// execute. -void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) { - LoopNode* head = loop->_head->as_Loop(); - if (has_control_dependencies_from_predicates(head)) { - return; - } - - // Find first invariant test that doesn't exit the loop - IfNode* unswitch_iff = find_unswitching_candidate((const IdealLoopTree *)loop); - assert(unswitch_iff != nullptr, "should be at least one"); +// Perform Loop Unswitching on the loop containing an invariant test that does not exit the loop. The loop is cloned +// such that we have two identical loops next to each other - a fast and a slow loop. We modify the loops as follows: +// - Fast loop: We remove the invariant test together with the false path and only leave the true path in the loop. +// - Slow loop: We remove the invariant test together with the true path and only leave the false path in the loop. +// +// We insert a new If node before both loops that performs the removed invariant test. If the test is true at runtime, +// we select the fast loop. Otherwise, we select the slow loop. +void PhaseIdealLoop::do_unswitching(IdealLoopTree* loop, Node_List& old_new) { + IfNode* unswitching_candidate = find_unswitching_candidate(loop); + assert(unswitching_candidate != nullptr, "should be at least one"); + LoopNode* head = loop->_head->as_Loop(); #ifndef PRODUCT if (TraceLoopOpts) { tty->print("Unswitch %d ", head->unswitch_count()+1); @@ -139,9 +137,8 @@ void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) { head->as_CountedLoop()->set_normal_loop(); } - IfNode* invar_iff = create_slow_version_of_loop(loop, old_new, unswitch_iff, CloneIncludesStripMined); - ProjNode* proj_true = invar_iff->proj_out(1); - verify_fast_loop(head, proj_true); + IfNode* loop_selector_if = create_slow_version_of_loop(loop, old_new, unswitching_candidate); + IfTrueNode* loop_selector_fast_loop_proj = loop_selector_if->proj_out(1)->as_IfTrue(); // Increment unswitch count LoopNode* head_clone = old_new[head->_idx]->as_Loop(); @@ -153,8 +150,8 @@ void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) { // control projection. Node_List worklist; - for (DUIterator_Fast imax, i = unswitch_iff->fast_outs(imax); i < imax; i++) { - ProjNode* proj= unswitch_iff->fast_out(i)->as_Proj(); + for (DUIterator_Fast imax, i = unswitching_candidate->fast_outs(imax); i < imax; i++) { + IfProjNode* proj = unswitching_candidate->fast_out(i)->as_IfProj(); // Copy to a worklist for easier manipulation for (DUIterator_Fast jmax, j = proj->fast_outs(jmax); j < jmax; j++) { Node* use = proj->fast_out(j); @@ -162,13 +159,13 @@ void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) { worklist.push(use); } } - ProjNode* invar_proj = invar_iff->proj_out(proj->_con)->as_Proj(); + IfProjNode* loop_selector_if_proj = loop_selector_if->proj_out(proj->_con)->as_IfProj(); while (worklist.size() > 0) { Node* use = worklist.pop(); Node* nuse = use->clone(); - nuse->set_req(0, invar_proj); + nuse->set_req(0, loop_selector_if_proj); _igvn.replace_input_of(use, 1, nuse); - register_new_node(nuse, invar_proj); + register_new_node(nuse, loop_selector_if_proj); // Same for the clone Node* use_clone = old_new[use->_idx]; _igvn.replace_input_of(use_clone, 1, nuse); @@ -176,13 +173,13 @@ void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) { } // Hardwire the control paths in the loops into if(true) and if(false) - _igvn.rehash_node_delayed(unswitch_iff); - dominated_by(proj_true->as_IfProj(), unswitch_iff, false, false); + _igvn.rehash_node_delayed(unswitching_candidate); + dominated_by(loop_selector_fast_loop_proj->as_IfProj(), unswitching_candidate, false, false); - IfNode* unswitch_iff_clone = old_new[unswitch_iff->_idx]->as_If(); - _igvn.rehash_node_delayed(unswitch_iff_clone); - ProjNode* proj_false = invar_iff->proj_out(0); - dominated_by(proj_false->as_IfProj(), unswitch_iff_clone, false, false); + IfNode* unswitching_candidate_clone = old_new[unswitching_candidate->_idx]->as_If(); + _igvn.rehash_node_delayed(unswitching_candidate_clone); + IfFalseNode* loop_selector_slow_loop_proj = loop_selector_if->proj_out(0)->as_IfFalse(); + dominated_by(loop_selector_slow_loop_proj, unswitching_candidate_clone, false, false); // Reoptimize loops loop->record_for_igvn(); @@ -195,92 +192,146 @@ void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) { #ifndef PRODUCT if (TraceLoopUnswitching) { tty->print_cr("Loop unswitching orig: %d @ %d new: %d @ %d", - head->_idx, unswitch_iff->_idx, - old_new[head->_idx]->_idx, unswitch_iff_clone->_idx); + head->_idx, unswitching_candidate->_idx, + old_new[head->_idx]->_idx, unswitching_candidate_clone->_idx); } #endif C->set_major_progress(); } -bool PhaseIdealLoop::has_control_dependencies_from_predicates(LoopNode* head) const { - Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl); - Predicates predicates(entry); - if (predicates.has_any()) { - assert(entry->is_IfProj(), "sanity - must be ifProj since there is at least one predicate"); - if (entry->outcnt() > 1) { - // Bailout if there are predicates from which there are additional control dependencies (i.e. from loop - // entry 'entry') to previously partially peeled statements since this case is not handled and can lead - // to a wrong execution. Remove this bailout, once this is fixed. - return true; - } + +// Class to create an If node that selects if the fast or the slow loop should be executed at runtime. +class UnswitchedLoopSelector { + PhaseIdealLoop* _phase; + IdealLoopTree* _outer_loop; + Node* _original_loop_entry; + uint _dom_depth; + IfNode* _selector_if; + IfTrueNode* _fast_loop_proj; + IfFalseNode* _slow_loop_proj; + + IfNode* create_selector_if(IdealLoopTree* loop, IfNode* unswitch_if_candidate) { + const uint dom_depth = _phase->dom_depth(_original_loop_entry); + _phase->igvn().rehash_node_delayed(_original_loop_entry); + Node* unswitching_candidate_bool = unswitch_if_candidate->in(1)->as_Bool(); + IfNode* selector_if = + (unswitch_if_candidate->Opcode() == Op_RangeCheck) ? + new RangeCheckNode(_original_loop_entry, unswitching_candidate_bool, unswitch_if_candidate->_prob, + unswitch_if_candidate->_fcnt) : + new IfNode(_original_loop_entry, unswitching_candidate_bool, unswitch_if_candidate->_prob, + unswitch_if_candidate->_fcnt); + _phase->register_node(selector_if, _outer_loop, _original_loop_entry, dom_depth); + return selector_if; } - return false; -} -//-------------------------create_slow_version_of_loop------------------------ -// Create a slow version of the loop by cloning the loop -// and inserting an if to select fast-slow versions. -// Return the inserted if. -IfNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop, - Node_List &old_new, - IfNode* unswitch_iff, - CloneLoopMode mode) { - LoopNode* head = loop->_head->as_Loop(); - Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl); - _igvn.rehash_node_delayed(entry); - IdealLoopTree* outer_loop = loop->_parent; - - head->verify_strip_mined(1); - - // Add test to new "if" outside of loop - Node *bol = unswitch_iff->in(1)->as_Bool(); - IfNode* iff = (unswitch_iff->Opcode() == Op_RangeCheck) ? new RangeCheckNode(entry, bol, unswitch_iff->_prob, unswitch_iff->_fcnt) : - new IfNode(entry, bol, unswitch_iff->_prob, unswitch_iff->_fcnt); - register_node(iff, outer_loop, entry, dom_depth(entry)); - IfProjNode* iffast = new IfTrueNode(iff); - register_node(iffast, outer_loop, iff, dom_depth(iff)); - IfProjNode* ifslow = new IfFalseNode(iff); - register_node(ifslow, outer_loop, iff, dom_depth(iff)); - - // Clone the loop body. The clone becomes the slow loop. The - // original pre-header will (illegally) have 3 control users - // (old & new loops & new if). - clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), mode, iff); - assert(old_new[head->_idx]->is_Loop(), "" ); - - // Fast (true) and Slow (false) control - IfProjNode* iffast_pred = iffast; - IfProjNode* ifslow_pred = ifslow; - clone_parse_and_assertion_predicates_to_unswitched_loop(loop, old_new, iffast_pred, ifslow_pred); - - Node* l = head->skip_strip_mined(); - _igvn.replace_input_of(l, LoopNode::EntryControl, iffast_pred); - set_idom(l, iffast_pred, dom_depth(l)); - LoopNode* slow_l = old_new[head->_idx]->as_Loop()->skip_strip_mined(); - _igvn.replace_input_of(slow_l, LoopNode::EntryControl, ifslow_pred); - set_idom(slow_l, ifslow_pred, dom_depth(l)); + IfTrueNode* create_fast_loop_proj() { + IfTrueNode* fast_loop_proj = new IfTrueNode(_selector_if); + _phase->register_node(fast_loop_proj, _outer_loop, _selector_if, _dom_depth); + return fast_loop_proj; + } - recompute_dom_depth(); + IfFalseNode* create_slow_loop_proj() { + IfFalseNode* slow_loop_proj = new IfFalseNode(_selector_if); + _phase->register_node(slow_loop_proj, _outer_loop, _selector_if, _dom_depth); + return slow_loop_proj; + } - return iff; -} + public: + UnswitchedLoopSelector(IfNode* unswitch_if_candidate, IdealLoopTree* loop) + : _phase(loop->_phase), + _outer_loop(loop->_head->as_Loop()->is_strip_mined() ? loop->_parent->_parent : loop->_parent), + _original_loop_entry(loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl)), + _dom_depth(_phase->dom_depth(_original_loop_entry)), + _selector_if(create_selector_if(loop, unswitch_if_candidate)), + _fast_loop_proj(create_fast_loop_proj()), + _slow_loop_proj(create_slow_loop_proj()) {} + + uint dom_depth() const { + return _dom_depth; + } + + Node* entry() const { + return _selector_if->in(0); + } + + IfNode* selector_if() const { + return _selector_if; + } + + IfTrueNode* fast_loop_proj() const { + return _fast_loop_proj; + } + + IfFalseNode* slow_loop_proj() const { + return _slow_loop_proj; + } +}; + +// Class to unswitch a loop and create predicates at the new loops. The newly cloned loop becomes the slow loop while +// the original loop becomes the fast loop. +class OriginalLoop { + LoopNode* _loop_head; // The original loop becomes the fast loop. + LoopNode* _strip_mined_loop_head; + IdealLoopTree* _loop; + Node_List* _old_new; + PhaseIdealLoop* _phase; + + public: + OriginalLoop(IdealLoopTree* loop, Node_List* old_new) + : _loop_head(loop->_head->as_Loop()), + _strip_mined_loop_head(_loop_head->skip_strip_mined()), + _loop(loop), + _old_new(old_new), + _phase(loop->_phase) {} + + + // Unswitch on the invariant unswitching candidate If. Return the new if which switches between the slow and fast loop. + IfNode* unswitch(IfNode* unswitching_candidate) { + UnswitchedLoopSelector unswitched_loop_selector(unswitching_candidate, _loop); + IfNode* loop_selector_if = unswitched_loop_selector.selector_if(); + const uint first_slow_loop_node_index = _phase->C->unique(); + _phase->clone_loop(_loop, *_old_new, _phase->dom_depth(_loop_head), + PhaseIdealLoop::CloneIncludesStripMined, loop_selector_if); + // Fast (true) and Slow (false) control + IfProjNode* iffast_pred = unswitched_loop_selector.fast_loop_proj(); + IfProjNode* ifslow_pred = unswitched_loop_selector.slow_loop_proj(); + _phase->clone_parse_and_assertion_predicates_to_unswitched_loop(_loop, *_old_new, iffast_pred, ifslow_pred); + + fix_loop_entries(iffast_pred, ifslow_pred); + + DEBUG_ONLY(verify_unswitched_loops(_loop_head, unswitched_loop_selector, _old_new);) + return loop_selector_if; + } + + void fix_loop_entries(IfProjNode* iffast_pred, IfProjNode* ifslow_pred) { + _phase->replace_loop_entry(_strip_mined_loop_head, iffast_pred); + LoopNode* slow_loop_strip_mined_head = _old_new->at(_strip_mined_loop_head->_idx)->as_Loop(); + _phase->replace_loop_entry(slow_loop_strip_mined_head, ifslow_pred); + } #ifdef ASSERT -void PhaseIdealLoop::verify_fast_loop(LoopNode* head, const ProjNode* proj_true) const { - assert(proj_true->is_IfTrue(), "must be true projection"); - Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl); - Predicates predicates(entry); - if (!predicates.has_any()) { - // No Parse Predicate. - Node* uniqc = proj_true->unique_ctrl_out(); - assert((uniqc == head && !head->is_strip_mined()) || (uniqc == head->in(LoopNode::EntryControl) - && head->is_strip_mined()), "must hold by construction if no predicates"); - } else { - // There is at least one Parse Predicate. When skipping all predicates/predicate blocks, we should end up - // at 'proj_true'. - assert(proj_true == predicates.entry(), "must hold by construction if at least one Parse Predicate"); + static void verify_unswitched_loops(LoopNode* fast_loop_head, UnswitchedLoopSelector& unswitched_loop_selector, + Node_List* old_new) { + verify_unswitched_loop(fast_loop_head, unswitched_loop_selector.fast_loop_proj()); + verify_unswitched_loop(old_new->at(fast_loop_head->_idx)->as_Loop(), unswitched_loop_selector.slow_loop_proj()); } -} -#endif // ASSERT + static void verify_unswitched_loop(LoopNode* loop_head, IfProjNode* loop_selector_if_proj) { + Node* entry = loop_head->skip_strip_mined()->in(LoopNode::EntryControl); + Predicates predicates(entry); + // When skipping all predicates, we should end up at 'loop_selector_if_proj'. + assert(loop_selector_if_proj == predicates.entry(), "should end up at selector if"); + } +#endif // ASSERT +}; + +// Create a slow version of the loop by cloning the loop and inserting an If to select the fast or slow version. +// Return the inserted loop selector If. +IfNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree* loop, Node_List& old_new, + IfNode* unswitching_candidate) { + OriginalLoop original_loop(loop, &old_new); + IfNode* loop_selector_if = original_loop.unswitch(unswitching_candidate); + recompute_dom_depth(); + return loop_selector_if; +} diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp index d095b1f6aadaa..c91ed6ae9e3b0 100644 --- a/src/hotspot/share/opto/loopnode.hpp +++ b/src/hotspot/share/opto/loopnode.hpp @@ -1347,6 +1347,11 @@ class PhaseIdealLoop : public PhaseTransform { public: void register_control(Node* n, IdealLoopTree *loop, Node* pred, bool update_body = true); + void replace_loop_entry(LoopNode* loop_head, Node* new_entry) { + _igvn.replace_input_of(loop_head, LoopNode::EntryControl, new_entry); + set_idom(loop_head, new_entry, dom_depth(new_entry)); + } + // Construct a range check for a predicate if BoolNode* rc_predicate(IdealLoopTree* loop, Node* ctrl, int scale, Node* offset, Node* init, Node* limit, jint stride, Node* range, bool upper, bool& overflow); @@ -1405,10 +1410,7 @@ class PhaseIdealLoop : public PhaseTransform { // Create a slow version of the loop by cloning the loop // and inserting an if to select fast-slow versions. // Return the inserted if. - IfNode* create_slow_version_of_loop(IdealLoopTree *loop, - Node_List &old_new, - IfNode* unswitch_iff, - CloneLoopMode mode); + IfNode* create_slow_version_of_loop(IdealLoopTree* loop, Node_List& old_new, IfNode* unswitch_iff); // Clone loop with an invariant test (that does not exit) and // insert a clone of the test that selects which version to @@ -1615,9 +1617,11 @@ class PhaseIdealLoop : public PhaseTransform { _nodes_required = UINT_MAX; } + public: // Clone Parse Predicates to slow and fast loop when unswitching a loop void clone_parse_and_assertion_predicates_to_unswitched_loop(IdealLoopTree* loop, Node_List& old_new, IfProjNode*& iffast_pred, IfProjNode*& ifslow_pred); + private: void clone_loop_predication_predicates_to_unswitched_loop(IdealLoopTree* loop, const Node_List& old_new, const PredicateBlock* predicate_block, Deoptimization::DeoptReason reason, IfProjNode*& iffast_pred,