提交 ac8ab5ce 编写于 作者: K kvn

7004535: Clone loop predicate during loop unswitch

Summary: Clone loop predicate for clonned loops
Reviewed-by: never
上级 9d449b34
......@@ -1349,9 +1349,17 @@ static Node* is_absolute( PhaseGVN *phase, PhiNode *phi_root, int true_path) {
static void split_once(PhaseIterGVN *igvn, Node *phi, Node *val, Node *n, Node *newn) {
igvn->hash_delete(n); // Remove from hash before hacking edges
Node* predicate_proj = NULL;
uint j = 1;
for( uint i = phi->req()-1; i > 0; i-- ) {
if( phi->in(i) == val ) { // Found a path with val?
for (uint i = phi->req()-1; i > 0; i--) {
if (phi->in(i) == val) { // Found a path with val?
if (n->is_Region()) {
Node* proj = PhaseIdealLoop::find_predicate(n->in(i));
if (proj != NULL) {
assert(predicate_proj == NULL, "only one predicate entry expected");
predicate_proj = proj;
}
}
// Add to NEW Region/Phi, no DU info
newn->set_req( j++, n->in(i) );
// Remove from OLD Region/Phi
......@@ -1362,6 +1370,12 @@ static void split_once(PhaseIterGVN *igvn, Node *phi, Node *val, Node *n, Node *
// Register the new node but do not transform it. Cannot transform until the
// entire Region/Phi conglomerate has been hacked as a single huge transform.
igvn->register_new_node_with_optimizer( newn );
// Clone loop predicates
if (predicate_proj != NULL) {
newn = igvn->clone_loop_predicates(predicate_proj, newn);
}
// Now I can point to the new node.
n->add_req(newn);
igvn->_worklist.push(n);
......
......@@ -1632,7 +1632,6 @@ void Compile::cleanup_loop_predicates(PhaseIterGVN &igvn) {
igvn.replace_node(n, n->in(1));
}
assert(predicate_count()==0, "should be clean!");
igvn.optimize();
}
//------------------------------Optimize---------------------------------------
......@@ -1689,7 +1688,7 @@ void Compile::Optimize() {
if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {
{
TracePhase t2("idealLoop", &_t_idealLoop, true);
PhaseIdealLoop ideal_loop( igvn, true, UseLoopPredicate);
PhaseIdealLoop ideal_loop( igvn, true );
loop_opts_cnt--;
if (major_progress()) print_method("PhaseIdealLoop 1", 2);
if (failing()) return;
......@@ -1697,7 +1696,7 @@ void Compile::Optimize() {
// Loop opts pass if partial peeling occurred in previous pass
if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {
TracePhase t3("idealLoop", &_t_idealLoop, true);
PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate);
PhaseIdealLoop ideal_loop( igvn, false );
loop_opts_cnt--;
if (major_progress()) print_method("PhaseIdealLoop 2", 2);
if (failing()) return;
......@@ -1705,7 +1704,7 @@ void Compile::Optimize() {
// Loop opts pass for loop-unrolling before CCP
if(major_progress() && (loop_opts_cnt > 0)) {
TracePhase t4("idealLoop", &_t_idealLoop, true);
PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate);
PhaseIdealLoop ideal_loop( igvn, false );
loop_opts_cnt--;
if (major_progress()) print_method("PhaseIdealLoop 3", 2);
}
......@@ -1743,21 +1742,13 @@ void Compile::Optimize() {
// peeling, unrolling, etc.
if(loop_opts_cnt > 0) {
debug_only( int cnt = 0; );
bool loop_predication = UseLoopPredicate;
while(major_progress() && (loop_opts_cnt > 0)) {
TracePhase t2("idealLoop", &_t_idealLoop, true);
assert( cnt++ < 40, "infinite cycle in loop optimization" );
PhaseIdealLoop ideal_loop( igvn, true, loop_predication);
PhaseIdealLoop ideal_loop( igvn, true);
loop_opts_cnt--;
if (major_progress()) print_method("PhaseIdealLoop iterations", 2);
if (failing()) return;
// Perform loop predication optimization during first iteration after CCP.
// After that switch it off and cleanup unused loop predicates.
if (loop_predication) {
loop_predication = false;
cleanup_loop_predicates(igvn);
if (failing()) return;
}
}
}
......
......@@ -489,6 +489,9 @@ class Compile : public Phase {
// remove the opaque nodes that protect the predicates so that the unused checks and
// uncommon traps will be eliminated from the graph.
void cleanup_loop_predicates(PhaseIterGVN &igvn);
bool is_predicate_opaq(Node * n) {
return _predicate_opaqs->contains(n);
}
// Compilation environment.
Arena* comp_arena() { return &_comp_arena; }
......
......@@ -27,6 +27,7 @@
#include "opto/addnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/connode.hpp"
#include "opto/loopnode.hpp"
#include "opto/phaseX.hpp"
#include "opto/runtime.hpp"
#include "opto/subnode.hpp"
......@@ -222,22 +223,35 @@ static Node* split_if(IfNode *iff, PhaseIterGVN *igvn) {
// Make a region merging constants and a region merging the rest
uint req_c = 0;
Node* predicate_proj = NULL;
for (uint ii = 1; ii < r->req(); ii++) {
if( phi->in(ii) == con1 ) {
if (phi->in(ii) == con1) {
req_c++;
}
Node* proj = PhaseIdealLoop::find_predicate(r->in(ii));
if (proj != NULL) {
assert(predicate_proj == NULL, "only one predicate entry expected");
predicate_proj = proj;
}
}
Node* predicate_c = NULL;
Node* predicate_x = NULL;
Node *region_c = new (igvn->C, req_c + 1) RegionNode(req_c + 1);
Node *phi_c = con1;
uint len = r->req();
Node *region_x = new (igvn->C, len - req_c + 1) RegionNode(len - req_c + 1);
Node *region_x = new (igvn->C, len - req_c) RegionNode(len - req_c);
Node *phi_x = PhiNode::make_blank(region_x, phi);
for (uint i = 1, i_c = 1, i_x = 1; i < len; i++) {
if( phi->in(i) == con1 ) {
if (phi->in(i) == con1) {
region_c->init_req( i_c++, r ->in(i) );
if (r->in(i) == predicate_proj)
predicate_c = predicate_proj;
} else {
region_x->init_req( i_x, r ->in(i) );
phi_x ->init_req( i_x++, phi->in(i) );
if (r->in(i) == predicate_proj)
predicate_x = predicate_proj;
}
}
......@@ -277,8 +291,20 @@ static Node* split_if(IfNode *iff, PhaseIterGVN *igvn) {
// Make the true/false arms
Node *iff_c_t = phase->transform(new (igvn->C, 1) IfTrueNode (iff_c));
Node *iff_c_f = phase->transform(new (igvn->C, 1) IfFalseNode(iff_c));
if (predicate_c != NULL) {
assert(predicate_x == NULL, "only one predicate entry expected");
// Clone loop predicates to each path
iff_c_t = igvn->clone_loop_predicates(predicate_c, iff_c_t);
iff_c_f = igvn->clone_loop_predicates(predicate_c, iff_c_f);
}
Node *iff_x_t = phase->transform(new (igvn->C, 1) IfTrueNode (iff_x));
Node *iff_x_f = phase->transform(new (igvn->C, 1) IfFalseNode(iff_x));
if (predicate_x != NULL) {
assert(predicate_c == NULL, "only one predicate entry expected");
// Clone loop predicates to each path
iff_x_t = igvn->clone_loop_predicates(predicate_x, iff_x_t);
iff_x_f = igvn->clone_loop_predicates(predicate_x, iff_x_f);
}
// Merge the TRUE paths
Node *region_s = new (igvn->C, 3) RegionNode(3);
......
此差异已折叠。
此差异已折叠。
......@@ -32,15 +32,17 @@
//
// orig: transformed:
// if (invariant-test) then
// predicate predicate
// loop loop
// stmt1 stmt1
// if (invariant-test) then stmt2
// stmt2 stmt4
// else endloop
// stmt3 else
// endif loop [clone]
// stmt4 stmt1 [clone]
// endloop stmt3
// endif predicate [clone]
// stmt4 loop [clone]
// endloop stmt1 [clone]
// stmt3
// stmt4 [clone]
// endloop
// endif
......@@ -124,8 +126,15 @@ void PhaseIdealLoop::do_unswitching (IdealLoopTree *loop, Node_List &old_new) {
ProjNode* proj_true = create_slow_version_of_loop(loop, old_new);
assert(proj_true->is_IfTrue() && proj_true->unique_ctrl_out() == head, "by construction");
#ifdef ASSERT
Node* uniqc = proj_true->unique_ctrl_out();
Node* entry = head->in(LoopNode::EntryControl);
Node* predicate = find_predicate(entry);
if (predicate != NULL) predicate = predicate->in(0);
assert(proj_true->is_IfTrue() &&
(predicate == NULL && uniqc == head ||
predicate != NULL && uniqc == predicate), "by construction");
#endif
// Increment unswitch count
LoopNode* head_clone = old_new[head->_idx]->as_Loop();
int nct = head->unswitch_count() + 1;
......@@ -227,21 +236,24 @@ ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
register_node(ifslow, outer_loop, iff, dom_depth(iff));
// Clone the loop body. The clone becomes the fast loop. The
// original pre-header will (illegally) have 2 control users (old & new loops).
// original pre-header will (illegally) have 3 control users
// (old & new loops & new if).
clone_loop(loop, old_new, dom_depth(head), iff);
assert(old_new[head->_idx]->is_Loop(), "" );
// Fast (true) control
Node* iffast_pred = clone_loop_predicates(entry, iffast);
_igvn.hash_delete(head);
head->set_req(LoopNode::EntryControl, iffast);
set_idom(head, iffast, dom_depth(head));
head->set_req(LoopNode::EntryControl, iffast_pred);
set_idom(head, iffast_pred, dom_depth(head));
_igvn._worklist.push(head);
// Slow (false) control
Node* ifslow_pred = move_loop_predicates(entry, ifslow);
LoopNode* slow_head = old_new[head->_idx]->as_Loop();
_igvn.hash_delete(slow_head);
slow_head->set_req(LoopNode::EntryControl, ifslow);
set_idom(slow_head, ifslow, dom_depth(slow_head));
slow_head->set_req(LoopNode::EntryControl, ifslow_pred);
set_idom(slow_head, ifslow_pred, dom_depth(slow_head));
_igvn._worklist.push(slow_head);
recompute_dom_depth();
......
......@@ -341,7 +341,12 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
//
assert(x->Opcode() == Op_Loop, "regular loops only");
C->print_method("Before CountedLoop", 3);
#ifndef PRODUCT
if (TraceLoopOpts) {
tty->print("Counted ");
loop->dump_head();
}
#endif
// If compare points to incr, we are ok. Otherwise the compare
// can directly point to the phi; in this case adjust the compare so that
// it points to the incr by adjusting the limit.
......@@ -864,8 +869,10 @@ void IdealLoopTree::split_outer_loop( PhaseIdealLoop *phase ) {
Node *outer = new (phase->C, 3) LoopNode( ctl, _head->in(outer_idx) );
outer = igvn.register_new_node_with_optimizer(outer, _head);
phase->set_created_loop_node();
Node* pred = phase->clone_loop_predicates(ctl, outer);
// Outermost loop falls into '_head' loop
_head->set_req(LoopNode::EntryControl, outer);
_head->set_req(LoopNode::EntryControl, pred);
_head->del_req(outer_idx);
// Split all the Phis up between '_head' loop and 'outer' loop.
for (DUIterator_Fast jmax, j = _head->fast_outs(jmax); j < jmax; j++) {
......@@ -1103,12 +1110,13 @@ bool IdealLoopTree::beautify_loops( PhaseIdealLoop *phase ) {
// backedges into a private merge point and use the merge point as
// the one true backedge.
if( _head->req() > 3 ) {
// Merge the many backedges into a single backedge.
// Merge the many backedges into a single backedge but leave
// the hottest backedge as separate edge for the following peel.
merge_many_backedges( phase );
result = true;
}
// If I am a shared header (multiple backedges), peel off myself loop.
// If I have one hot backedge, peel off myself loop.
// I better be the outermost loop.
if( _head->req() > 3 ) {
split_outer_loop( phase );
......@@ -1433,9 +1441,9 @@ void IdealLoopTree::dump_head( ) const {
tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx);
if (_irreducible) tty->print(" IRREDUCIBLE");
if (UseLoopPredicate) {
Node* entry = _head->in(LoopNode::EntryControl);
if (entry != NULL && entry->is_Proj() &&
PhaseIdealLoop::is_uncommon_trap_if_pattern(entry->as_Proj(), Deoptimization::Reason_predicate)) {
Node* entry = PhaseIdealLoop::find_predicate_insertion_point(_head->in(LoopNode::EntryControl),
Deoptimization::Reason_predicate);
if (entry != NULL) {
tty->print(" predicated");
}
}
......@@ -1541,7 +1549,7 @@ void PhaseIdealLoop::eliminate_useless_predicates() {
//----------------------------build_and_optimize-------------------------------
// Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to
// its corresponding LoopNode. If 'optimize' is true, do some loop cleanups.
void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool do_loop_pred) {
void PhaseIdealLoop::build_and_optimize(bool do_split_ifs) {
ResourceMark rm;
int old_progress = C->major_progress();
......@@ -1573,6 +1581,13 @@ void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool do_loop_pred) {
// Do not need a safepoint at the top level
_ltree_root->_has_sfpt = 1;
// Initialize Dominators.
// Checked in clone_loop_predicate() during beautify_loops().
_idom_size = 0;
_idom = NULL;
_dom_depth = NULL;
_dom_stk = NULL;
// Empty pre-order array
allocate_preorders();
......@@ -1698,8 +1713,9 @@ void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool do_loop_pred) {
return;
}
// some parser-inserted loop predicates could never be used by loop
// predication. Eliminate them before loop optimization
// Some parser-inserted loop predicates could never be used by loop
// predication or they were moved away from loop during some optimizations.
// For example, peeling. Eliminate them before next loop optimizations.
if (UseLoopPredicate) {
eliminate_useless_predicates();
}
......@@ -1750,7 +1766,7 @@ void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool do_loop_pred) {
}
// Perform loop predication before iteration splitting
if (do_loop_pred && C->has_loops() && !C->major_progress()) {
if (C->has_loops() && !C->major_progress() && (C->predicate_count() > 0)) {
_ltree_root->_child->loop_predication(this);
}
......@@ -1793,8 +1809,20 @@ void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool do_loop_pred) {
C->set_major_progress();
}
// Convert scalar to superword operations
// Keep loop predicates and perform optimizations with them
// until no more loop optimizations could be done.
// After that switch predicates off and do more loop optimizations.
if (!C->major_progress() && (C->predicate_count() > 0)) {
C->cleanup_loop_predicates(_igvn);
#ifndef PRODUCT
if (TraceLoopOpts) {
tty->print_cr("PredicatesOff");
}
#endif
C->set_major_progress();
}
// Convert scalar to superword operations at the end of all loop opts.
if (UseSuperWord && C->has_loops() && !C->major_progress()) {
// SuperWord transform
SuperWord sw(this);
......
......@@ -706,11 +706,11 @@ private:
_dom_lca_tags(arena()), // Thread::resource_area
_verify_me(NULL),
_verify_only(true) {
build_and_optimize(false, false);
build_and_optimize(false);
}
// build the loop tree and perform any requested optimizations
void build_and_optimize(bool do_split_if, bool do_loop_pred);
void build_and_optimize(bool do_split_if);
public:
// Dominators for the sea of nodes
......@@ -721,13 +721,13 @@ public:
Node *dom_lca_internal( Node *n1, Node *n2 ) const;
// Compute the Ideal Node to Loop mapping
PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs, bool do_loop_pred) :
PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) :
PhaseTransform(Ideal_Loop),
_igvn(igvn),
_dom_lca_tags(arena()), // Thread::resource_area
_verify_me(NULL),
_verify_only(false) {
build_and_optimize(do_split_ifs, do_loop_pred);
build_and_optimize(do_split_ifs);
}
// Verify that verify_me made the same decisions as a fresh run.
......@@ -737,7 +737,7 @@ public:
_dom_lca_tags(arena()), // Thread::resource_area
_verify_me(verify_me),
_verify_only(false) {
build_and_optimize(false, false);
build_and_optimize(false);
}
// Build and verify the loop tree without modifying the graph. This
......@@ -830,7 +830,26 @@ public:
Deoptimization::DeoptReason reason);
void register_control(Node* n, IdealLoopTree *loop, Node* pred);
// Find a good location to insert a predicate
// Clone loop predicates to cloned loops (peeled, unswitched)
static ProjNode* clone_predicate(ProjNode* predicate_proj, Node* new_entry,
Deoptimization::DeoptReason reason,
PhaseIdealLoop* loop_phase,
PhaseIterGVN* igvn);
static ProjNode* move_predicate(ProjNode* predicate_proj, Node* new_entry,
Deoptimization::DeoptReason reason,
PhaseIdealLoop* loop_phase,
PhaseIterGVN* igvn);
static Node* clone_loop_predicates(Node* old_entry, Node* new_entry,
bool move_predicates,
PhaseIdealLoop* loop_phase,
PhaseIterGVN* igvn);
Node* clone_loop_predicates(Node* old_entry, Node* new_entry);
Node* move_loop_predicates(Node* old_entry, Node* new_entry);
void eliminate_loop_predicates(Node* entry);
static Node* skip_loop_predicates(Node* entry);
// Find a good location to insert a predicate
static ProjNode* find_predicate_insertion_point(Node* start_c, Deoptimization::DeoptReason reason);
// Find a predicate
static Node* find_predicate(Node* entry);
......
......@@ -2139,9 +2139,12 @@ bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& p
//
// orig
//
// stmt1
// |
// v
// stmt1
// |
// v
// loop predicate
// |
// v
// loop<----+
// | |
// stmt2 |
......@@ -2172,6 +2175,9 @@ bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& p
// after clone loop
//
// stmt1
// |
// v
// loop predicate
// / \
// clone / \ orig
// / \
......@@ -2210,12 +2216,15 @@ bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& p
// after partial peel
//
// stmt1
// |
// v
// loop predicate
// /
// clone / orig
// / TOP
// / \
// v v
// TOP->region region----+
// TOP->loop loop----+
// | | |
// stmt2 stmt2 |
// | | |
......@@ -2253,13 +2262,17 @@ bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& p
// stmt1
// |
// v
// stmt2 clone
// |
// v
// ........> ifA clone
// : / |
// dom / |
// : v v
// : false true
// : | |
// : | stmt2 clone
// : | v
// : | loop predicate
// : | |
// : | v
// : | newloop<-----+
......@@ -2289,6 +2302,7 @@ bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& p
//
bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
if (!loop->_head->is_Loop()) {
return false; }
......@@ -2316,6 +2330,7 @@ bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
}
}
Node* entry = head->in(LoopNode::EntryControl);
int dd = dom_depth(head);
// Step 1: find cut point
......@@ -2612,6 +2627,8 @@ bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
// Backedge of the surviving new_head (the clone) is original last_peel
_igvn.hash_delete(new_head_clone);
Node* new_entry = move_loop_predicates(entry, new_head_clone->in(LoopNode::EntryControl));
new_head_clone->set_req(LoopNode::EntryControl, new_entry);
new_head_clone->set_req(LoopNode::LoopBackControl, last_peel);
_igvn._worklist.push(new_head_clone);
......
......@@ -471,6 +471,13 @@ public:
_delay_transform = delay;
}
// Clone loop predicates. Defined in loopTransform.cpp.
Node* clone_loop_predicates(Node* old_entry, Node* new_entry);
Node* move_loop_predicates(Node* old_entry, Node* new_entry);
// Create a new if below new_entry for the predicate to be cloned
ProjNode* create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
Deoptimization::DeoptReason reason);
#ifndef PRODUCT
protected:
// Sub-quadratic implementation of VerifyIterativeGVN.
......
......@@ -399,6 +399,9 @@ void PhaseIdealLoop::do_split_if( Node *iff ) {
#ifndef PRODUCT
if( PrintOpto && VerifyLoopOptimizations )
tty->print_cr("Split-if");
if (TraceLoopOpts) {
tty->print_cr("SplitIf");
}
#endif
C->set_major_progress();
Node *region = iff->in(0);
......
......@@ -1132,6 +1132,13 @@ void SuperWord::co_locate_pack(Node_List* pk) {
void SuperWord::output() {
if (_packset.length() == 0) return;
#ifndef PRODUCT
if (TraceLoopOpts) {
tty->print("SuperWord ");
lpt()->dump_head();
}
#endif
// MUST ENSURE main loop's initial value is properly aligned:
// (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
......
......@@ -32,6 +32,7 @@
//------------------------------VectorNode--------------------------------------
// Vector Operation
class VectorNode : public Node {
virtual uint size_of() const { return sizeof(*this); }
protected:
uint _length; // vector length
virtual BasicType elt_basic_type() const = 0; // Vector element basic type
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册