提交 6f9e0690 编写于 作者: R roland

7197327: 40% regression on 8 b41 comp 8 b40 on specjvm2008.mpegaudio on oob

Summary: Add support for expensive nodes.
Reviewed-by: kvn
上级 54dabe91
...@@ -618,6 +618,9 @@ ...@@ -618,6 +618,9 @@
\ \
product(intx, LiveNodeCountInliningCutoff, 20000, \ product(intx, LiveNodeCountInliningCutoff, 20000, \
"max number of live nodes in a method") \ "max number of live nodes in a method") \
\
diagnostic(bool, OptimizeExpensiveOps, true, \
"Find best control for expensive operations") \
C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG) C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG)
......
...@@ -409,6 +409,13 @@ void Compile::remove_useless_nodes(Unique_Node_List &useful) { ...@@ -409,6 +409,13 @@ void Compile::remove_useless_nodes(Unique_Node_List &useful) {
remove_macro_node(n); remove_macro_node(n);
} }
} }
// Remove useless expensive node
for (int i = C->expensive_count()-1; i >= 0; i--) {
Node* n = C->expensive_node(i);
if (!useful.member(n)) {
remove_expensive_node(n);
}
}
// clean up the late inline lists // clean up the late inline lists
remove_useless_late_inlines(&_string_late_inlines, useful); remove_useless_late_inlines(&_string_late_inlines, useful);
remove_useless_late_inlines(&_late_inlines, useful); remove_useless_late_inlines(&_late_inlines, useful);
...@@ -1061,6 +1068,7 @@ void Compile::Init(int aliaslevel) { ...@@ -1061,6 +1068,7 @@ void Compile::Init(int aliaslevel) {
_intrinsics = NULL; _intrinsics = NULL;
_macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL); _macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);
_predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL); _predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);
_expensive_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);
register_library_intrinsics(); register_library_intrinsics();
} }
...@@ -1927,6 +1935,10 @@ void Compile::Optimize() { ...@@ -1927,6 +1935,10 @@ void Compile::Optimize() {
if (failing()) return; if (failing()) return;
// No more new expensive nodes will be added to the list from here
// so keep only the actual candidates for optimizations.
cleanup_expensive_nodes(igvn);
// Perform escape analysis // Perform escape analysis
if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) { if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
if (has_loops()) { if (has_loops()) {
...@@ -3010,6 +3022,15 @@ bool Compile::final_graph_reshaping() { ...@@ -3010,6 +3022,15 @@ bool Compile::final_graph_reshaping() {
return true; return true;
} }
// Expensive nodes have their control input set to prevent the GVN
// from freely commoning them. There's no GVN beyond this point so
// no need to keep the control input. We want the expensive nodes to
// be freely moved to the least frequent code path by gcm.
assert(OptimizeExpensiveOps || expensive_count() == 0, "optimization off but list non empty?");
for (int i = 0; i < expensive_count(); i++) {
_expensive_nodes->at(i)->set_req(0, NULL);
}
Final_Reshape_Counts frc; Final_Reshape_Counts frc;
// Visit everybody reachable! // Visit everybody reachable!
...@@ -3525,3 +3546,126 @@ void Compile::dump_inlining() { ...@@ -3525,3 +3546,126 @@ void Compile::dump_inlining() {
} }
} }
} }
int Compile::cmp_expensive_nodes(Node* n1, Node* n2) {
if (n1->Opcode() < n2->Opcode()) return -1;
else if (n1->Opcode() > n2->Opcode()) return 1;
assert(n1->req() == n2->req(), err_msg_res("can't compare %s nodes: n1->req() = %d, n2->req() = %d", NodeClassNames[n1->Opcode()], n1->req(), n2->req()));
for (uint i = 1; i < n1->req(); i++) {
if (n1->in(i) < n2->in(i)) return -1;
else if (n1->in(i) > n2->in(i)) return 1;
}
return 0;
}
int Compile::cmp_expensive_nodes(Node** n1p, Node** n2p) {
Node* n1 = *n1p;
Node* n2 = *n2p;
return cmp_expensive_nodes(n1, n2);
}
void Compile::sort_expensive_nodes() {
if (!expensive_nodes_sorted()) {
_expensive_nodes->sort(cmp_expensive_nodes);
}
}
bool Compile::expensive_nodes_sorted() const {
for (int i = 1; i < _expensive_nodes->length(); i++) {
if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i-1)) < 0) {
return false;
}
}
return true;
}
bool Compile::should_optimize_expensive_nodes(PhaseIterGVN &igvn) {
if (_expensive_nodes->length() == 0) {
return false;
}
assert(OptimizeExpensiveOps, "optimization off?");
// Take this opportunity to remove dead nodes from the list
int j = 0;
for (int i = 0; i < _expensive_nodes->length(); i++) {
Node* n = _expensive_nodes->at(i);
if (!n->is_unreachable(igvn)) {
assert(n->is_expensive(), "should be expensive");
_expensive_nodes->at_put(j, n);
j++;
}
}
_expensive_nodes->trunc_to(j);
// Then sort the list so that similar nodes are next to each other
// and check for at least two nodes of identical kind with same data
// inputs.
sort_expensive_nodes();
for (int i = 0; i < _expensive_nodes->length()-1; i++) {
if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i+1)) == 0) {
return true;
}
}
return false;
}
void Compile::cleanup_expensive_nodes(PhaseIterGVN &igvn) {
if (_expensive_nodes->length() == 0) {
return;
}
assert(OptimizeExpensiveOps, "optimization off?");
// Sort to bring similar nodes next to each other and clear the
// control input of nodes for which there's only a single copy.
sort_expensive_nodes();
int j = 0;
int identical = 0;
int i = 0;
for (; i < _expensive_nodes->length()-1; i++) {
assert(j <= i, "can't write beyond current index");
if (_expensive_nodes->at(i)->Opcode() == _expensive_nodes->at(i+1)->Opcode()) {
identical++;
_expensive_nodes->at_put(j++, _expensive_nodes->at(i));
continue;
}
if (identical > 0) {
_expensive_nodes->at_put(j++, _expensive_nodes->at(i));
identical = 0;
} else {
Node* n = _expensive_nodes->at(i);
igvn.hash_delete(n);
n->set_req(0, NULL);
igvn.hash_insert(n);
}
}
if (identical > 0) {
_expensive_nodes->at_put(j++, _expensive_nodes->at(i));
} else if (_expensive_nodes->length() >= 1) {
Node* n = _expensive_nodes->at(i);
igvn.hash_delete(n);
n->set_req(0, NULL);
igvn.hash_insert(n);
}
_expensive_nodes->trunc_to(j);
}
void Compile::add_expensive_node(Node * n) {
assert(!_expensive_nodes->contains(n), "duplicate entry in expensive list");
assert(n->is_expensive(), "expensive nodes with non-null control here only");
assert(!n->is_CFG() && !n->is_Mem(), "no cfg or memory nodes here");
if (OptimizeExpensiveOps) {
_expensive_nodes->append(n);
} else {
// Clear control input and let IGVN optimize expensive nodes if
// OptimizeExpensiveOps is off.
n->set_req(0, NULL);
}
}
...@@ -314,6 +314,7 @@ class Compile : public Phase { ...@@ -314,6 +314,7 @@ class Compile : public Phase {
GrowableArray<CallGenerator*>* _intrinsics; // List of intrinsics. GrowableArray<CallGenerator*>* _intrinsics; // List of intrinsics.
GrowableArray<Node*>* _macro_nodes; // List of nodes which need to be expanded before matching. GrowableArray<Node*>* _macro_nodes; // List of nodes which need to be expanded before matching.
GrowableArray<Node*>* _predicate_opaqs; // List of Opaque1 nodes for the loop predicates. GrowableArray<Node*>* _predicate_opaqs; // List of Opaque1 nodes for the loop predicates.
GrowableArray<Node*>* _expensive_nodes; // List of nodes that are expensive to compute and that we'd better not let the GVN freely common
ConnectionGraph* _congraph; ConnectionGraph* _congraph;
#ifndef PRODUCT #ifndef PRODUCT
IdealGraphPrinter* _printer; IdealGraphPrinter* _printer;
...@@ -398,6 +399,13 @@ class Compile : public Phase { ...@@ -398,6 +399,13 @@ class Compile : public Phase {
GrowableArray<PrintInliningBuffer>* _print_inlining_list; GrowableArray<PrintInliningBuffer>* _print_inlining_list;
int _print_inlining; int _print_inlining;
// Only keep nodes in the expensive node list that need to be optimized
void cleanup_expensive_nodes(PhaseIterGVN &igvn);
// Use for sorting expensive nodes to bring similar nodes together
static int cmp_expensive_nodes(Node** n1, Node** n2);
// Expensive nodes list already sorted?
bool expensive_nodes_sorted() const;
public: public:
outputStream* print_inlining_stream() const { outputStream* print_inlining_stream() const {
...@@ -573,8 +581,10 @@ class Compile : public Phase { ...@@ -573,8 +581,10 @@ class Compile : public Phase {
int macro_count() { return _macro_nodes->length(); } int macro_count() { return _macro_nodes->length(); }
int predicate_count() { return _predicate_opaqs->length();} int predicate_count() { return _predicate_opaqs->length();}
int expensive_count() { return _expensive_nodes->length(); }
Node* macro_node(int idx) { return _macro_nodes->at(idx); } Node* macro_node(int idx) { return _macro_nodes->at(idx); }
Node* predicate_opaque1_node(int idx) { return _predicate_opaqs->at(idx);} Node* predicate_opaque1_node(int idx) { return _predicate_opaqs->at(idx);}
Node* expensive_node(int idx) { return _expensive_nodes->at(idx); }
ConnectionGraph* congraph() { return _congraph;} ConnectionGraph* congraph() { return _congraph;}
void set_congraph(ConnectionGraph* congraph) { _congraph = congraph;} void set_congraph(ConnectionGraph* congraph) { _congraph = congraph;}
void add_macro_node(Node * n) { void add_macro_node(Node * n) {
...@@ -592,6 +602,12 @@ class Compile : public Phase { ...@@ -592,6 +602,12 @@ class Compile : public Phase {
_predicate_opaqs->remove(n); _predicate_opaqs->remove(n);
} }
} }
void add_expensive_node(Node * n);
void remove_expensive_node(Node * n) {
if (_expensive_nodes->contains(n)) {
_expensive_nodes->remove(n);
}
}
void add_predicate_opaq(Node * n) { void add_predicate_opaq(Node * n) {
assert(!_predicate_opaqs->contains(n), " duplicate entry in predicate opaque1"); assert(!_predicate_opaqs->contains(n), " duplicate entry in predicate opaque1");
assert(_macro_nodes->contains(n), "should have already been in macro list"); assert(_macro_nodes->contains(n), "should have already been in macro list");
...@@ -604,6 +620,13 @@ class Compile : public Phase { ...@@ -604,6 +620,13 @@ class Compile : public Phase {
return _predicate_opaqs->contains(n); return _predicate_opaqs->contains(n);
} }
// Are there candidate expensive nodes for optimization?
bool should_optimize_expensive_nodes(PhaseIterGVN &igvn);
// Check whether n1 and n2 are similar
static int cmp_expensive_nodes(Node* n1, Node* n2);
// Sort expensive nodes to locate similar expensive nodes
void sort_expensive_nodes();
// Compilation environment. // Compilation environment.
Arena* comp_arena() { return &_comp_arena; } Arena* comp_arena() { return &_comp_arena; }
ciEnv* env() const { return _env; } ciEnv* env() const { return _env; }
......
...@@ -1653,7 +1653,7 @@ void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFu ...@@ -1653,7 +1653,7 @@ void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFu
// really odd corner cases (+/- Infinity). Just uncommon-trap them. // really odd corner cases (+/- Infinity). Just uncommon-trap them.
bool LibraryCallKit::inline_exp() { bool LibraryCallKit::inline_exp() {
Node* arg = round_double_node(argument(0)); Node* arg = round_double_node(argument(0));
Node* n = _gvn.transform(new (C) ExpDNode(0, arg)); Node* n = _gvn.transform(new (C) ExpDNode(C, control(), arg));
finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
...@@ -1688,7 +1688,7 @@ bool LibraryCallKit::inline_pow() { ...@@ -1688,7 +1688,7 @@ bool LibraryCallKit::inline_pow() {
if (!too_many_traps(Deoptimization::Reason_intrinsic)) { if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
// Short form: skip the fancy tests and just check for NaN result. // Short form: skip the fancy tests and just check for NaN result.
result = _gvn.transform(new (C) PowDNode(0, x, y)); result = _gvn.transform(new (C) PowDNode(C, control(), x, y));
} else { } else {
// If this inlining ever returned NaN in the past, include all // If this inlining ever returned NaN in the past, include all
// checks + call to the runtime. // checks + call to the runtime.
...@@ -1715,7 +1715,7 @@ bool LibraryCallKit::inline_pow() { ...@@ -1715,7 +1715,7 @@ bool LibraryCallKit::inline_pow() {
Node *complex_path = _gvn.transform( new (C) IfTrueNode(if1) ); Node *complex_path = _gvn.transform( new (C) IfTrueNode(if1) );
// Set fast path result // Set fast path result
Node *fast_result = _gvn.transform( new (C) PowDNode(0, x, y) ); Node *fast_result = _gvn.transform( new (C) PowDNode(C, control(), x, y) );
phi->init_req(3, fast_result); phi->init_req(3, fast_result);
// Complex path // Complex path
...@@ -1775,7 +1775,7 @@ bool LibraryCallKit::inline_pow() { ...@@ -1775,7 +1775,7 @@ bool LibraryCallKit::inline_pow() {
// abs(x) // abs(x)
Node *absx=_gvn.transform( new (C) AbsDNode(x)); Node *absx=_gvn.transform( new (C) AbsDNode(x));
// abs(x)^y // abs(x)^y
Node *absxpowy = _gvn.transform( new (C) PowDNode(0, absx, y) ); Node *absxpowy = _gvn.transform( new (C) PowDNode(C, control(), absx, y) );
// -abs(x)^y // -abs(x)^y
Node *negabsxpowy = _gvn.transform(new (C) NegDNode (absxpowy)); Node *negabsxpowy = _gvn.transform(new (C) NegDNode (absxpowy));
// (1&(long)y)==1?-DPow(abs(x), y):DPow(abs(x), y) // (1&(long)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
......
...@@ -88,9 +88,9 @@ Node *PhaseIdealLoop::get_early_ctrl( Node *n ) { ...@@ -88,9 +88,9 @@ Node *PhaseIdealLoop::get_early_ctrl( Node *n ) {
assert( !n->is_Phi() && !n->is_CFG(), "this code only handles data nodes" ); assert( !n->is_Phi() && !n->is_CFG(), "this code only handles data nodes" );
uint i; uint i;
Node *early; Node *early;
if( n->in(0) ) { if (n->in(0) && !n->is_expensive()) {
early = n->in(0); early = n->in(0);
if( !early->is_CFG() ) // Might be a non-CFG multi-def if (!early->is_CFG()) // Might be a non-CFG multi-def
early = get_ctrl(early); // So treat input as a straight data input early = get_ctrl(early); // So treat input as a straight data input
i = 1; i = 1;
} else { } else {
...@@ -99,28 +99,28 @@ Node *PhaseIdealLoop::get_early_ctrl( Node *n ) { ...@@ -99,28 +99,28 @@ Node *PhaseIdealLoop::get_early_ctrl( Node *n ) {
} }
uint e_d = dom_depth(early); uint e_d = dom_depth(early);
assert( early, "" ); assert( early, "" );
for( ; i < n->req(); i++ ) { for (; i < n->req(); i++) {
Node *cin = get_ctrl(n->in(i)); Node *cin = get_ctrl(n->in(i));
assert( cin, "" ); assert( cin, "" );
// Keep deepest dominator depth // Keep deepest dominator depth
uint c_d = dom_depth(cin); uint c_d = dom_depth(cin);
if( c_d > e_d ) { // Deeper guy? if (c_d > e_d) { // Deeper guy?
early = cin; // Keep deepest found so far early = cin; // Keep deepest found so far
e_d = c_d; e_d = c_d;
} else if( c_d == e_d && // Same depth? } else if (c_d == e_d && // Same depth?
early != cin ) { // If not equal, must use slower algorithm early != cin) { // If not equal, must use slower algorithm
// If same depth but not equal, one _must_ dominate the other // If same depth but not equal, one _must_ dominate the other
// and we want the deeper (i.e., dominated) guy. // and we want the deeper (i.e., dominated) guy.
Node *n1 = early; Node *n1 = early;
Node *n2 = cin; Node *n2 = cin;
while( 1 ) { while (1) {
n1 = idom(n1); // Walk up until break cycle n1 = idom(n1); // Walk up until break cycle
n2 = idom(n2); n2 = idom(n2);
if( n1 == cin || // Walked early up to cin if (n1 == cin || // Walked early up to cin
dom_depth(n2) < c_d ) dom_depth(n2) < c_d)
break; // early is deeper; keep him break; // early is deeper; keep him
if( n2 == early || // Walked cin up to early if (n2 == early || // Walked cin up to early
dom_depth(n1) < c_d ) { dom_depth(n1) < c_d) {
early = cin; // cin is deeper; keep him early = cin; // cin is deeper; keep him
break; break;
} }
...@@ -132,9 +132,108 @@ Node *PhaseIdealLoop::get_early_ctrl( Node *n ) { ...@@ -132,9 +132,108 @@ Node *PhaseIdealLoop::get_early_ctrl( Node *n ) {
// Return earliest legal location // Return earliest legal location
assert(early == find_non_split_ctrl(early), "unexpected early control"); assert(early == find_non_split_ctrl(early), "unexpected early control");
if (n->is_expensive()) {
assert(n->in(0), "should have control input");
early = get_early_ctrl_for_expensive(n, early);
}
return early; return early;
} }
//------------------------------get_early_ctrl_for_expensive---------------------------------
// Move node up the dominator tree as high as legal while still beneficial
Node *PhaseIdealLoop::get_early_ctrl_for_expensive(Node *n, Node* earliest) {
assert(n->in(0) && n->is_expensive(), "expensive node with control input here");
assert(OptimizeExpensiveOps, "optimization off?");
Node* ctl = n->in(0);
assert(ctl->is_CFG(), "expensive input 0 must be cfg");
uint min_dom_depth = dom_depth(earliest);
#ifdef ASSERT
if (!is_dominator(ctl, earliest) && !is_dominator(earliest, ctl)) {
dump_bad_graph("Bad graph detected in get_early_ctrl_for_expensive", n, earliest, ctl);
assert(false, "Bad graph detected in get_early_ctrl_for_expensive");
}
#endif
if (dom_depth(ctl) < min_dom_depth) {
return earliest;
}
while (1) {
Node *next = ctl;
// Moving the node out of a loop on the projection of a If
// confuses loop predication. So once we hit a Loop in a If branch
// that doesn't branch to an UNC, we stop. The code that process
// expensive nodes will notice the loop and skip over it to try to
// move the node further up.
if (ctl->is_CountedLoop() && ctl->in(1) != NULL && ctl->in(1)->in(0) != NULL && ctl->in(1)->in(0)->is_If()) {
if (!is_uncommon_trap_if_pattern(ctl->in(1)->as_Proj(), Deoptimization::Reason_none)) {
break;
}
next = idom(ctl->in(1)->in(0));
} else if (ctl->is_Proj()) {
// We only move it up along a projection if the projection is
// the single control projection for its parent: same code path,
// if it's a If with UNC or fallthrough of a call.
Node* parent_ctl = ctl->in(0);
if (parent_ctl == NULL) {
break;
} else if (parent_ctl->is_CountedLoopEnd() && parent_ctl->as_CountedLoopEnd()->loopnode() != NULL) {
next = parent_ctl->as_CountedLoopEnd()->loopnode()->init_control();
} else if (parent_ctl->is_If()) {
if (!is_uncommon_trap_if_pattern(ctl->as_Proj(), Deoptimization::Reason_none)) {
break;
}
assert(idom(ctl) == parent_ctl, "strange");
next = idom(parent_ctl);
} else if (ctl->is_CatchProj()) {
if (ctl->as_Proj()->_con != CatchProjNode::fall_through_index) {
break;
}
assert(parent_ctl->in(0)->in(0)->is_Call(), "strange graph");
next = parent_ctl->in(0)->in(0)->in(0);
} else {
// Check if parent control has a single projection (this
// control is the only possible successor of the parent
// control). If so, we can try to move the node above the
// parent control.
int nb_ctl_proj = 0;
for (DUIterator_Fast imax, i = parent_ctl->fast_outs(imax); i < imax; i++) {
Node *p = parent_ctl->fast_out(i);
if (p->is_Proj() && p->is_CFG()) {
nb_ctl_proj++;
if (nb_ctl_proj > 1) {
break;
}
}
}
if (nb_ctl_proj > 1) {
break;
}
assert(parent_ctl->is_Start() || parent_ctl->is_MemBar() || parent_ctl->is_Call(), "unexpected node");
assert(idom(ctl) == parent_ctl, "strange");
next = idom(parent_ctl);
}
} else {
next = idom(ctl);
}
if (next->is_Root() || next->is_Start() || dom_depth(next) < min_dom_depth) {
break;
}
ctl = next;
}
if (ctl != n->in(0)) {
_igvn.hash_delete(n);
n->set_req(0, ctl);
_igvn.hash_insert(n);
}
return ctl;
}
//------------------------------set_early_ctrl--------------------------------- //------------------------------set_early_ctrl---------------------------------
// Set earliest legal control // Set earliest legal control
void PhaseIdealLoop::set_early_ctrl( Node *n ) { void PhaseIdealLoop::set_early_ctrl( Node *n ) {
...@@ -1892,6 +1991,98 @@ void PhaseIdealLoop::eliminate_useless_predicates() { ...@@ -1892,6 +1991,98 @@ void PhaseIdealLoop::eliminate_useless_predicates() {
} }
} }
//------------------------process_expensive_nodes-----------------------------
// Expensive nodes have their control input set to prevent the GVN
// from commoning them and as a result forcing the resulting node to
// be in a more frequent path. Use CFG information here, to change the
// control inputs so that some expensive nodes can be commoned while
// not executed more frequently.
bool PhaseIdealLoop::process_expensive_nodes() {
assert(OptimizeExpensiveOps, "optimization off?");
// Sort nodes to bring similar nodes together
C->sort_expensive_nodes();
bool progress = false;
for (int i = 0; i < C->expensive_count(); ) {
Node* n = C->expensive_node(i);
int start = i;
// Find nodes similar to n
i++;
for (; i < C->expensive_count() && Compile::cmp_expensive_nodes(n, C->expensive_node(i)) == 0; i++);
int end = i;
// And compare them two by two
for (int j = start; j < end; j++) {
Node* n1 = C->expensive_node(j);
if (is_node_unreachable(n1)) {
continue;
}
for (int k = j+1; k < end; k++) {
Node* n2 = C->expensive_node(k);
if (is_node_unreachable(n2)) {
continue;
}
assert(n1 != n2, "should be pair of nodes");
Node* c1 = n1->in(0);
Node* c2 = n2->in(0);
Node* parent_c1 = c1;
Node* parent_c2 = c2;
// The call to get_early_ctrl_for_expensive() moves the
// expensive nodes up but stops at loops that are in a if
// branch. See whether we can exit the loop and move above the
// If.
if (c1->is_Loop()) {
parent_c1 = c1->in(1);
}
if (c2->is_Loop()) {
parent_c2 = c2->in(1);
}
if (parent_c1 == parent_c2) {
_igvn._worklist.push(n1);
_igvn._worklist.push(n2);
continue;
}
// Look for identical expensive node up the dominator chain.
if (is_dominator(c1, c2)) {
c2 = c1;
} else if (is_dominator(c2, c1)) {
c1 = c2;
} else if (parent_c1->is_Proj() && parent_c1->in(0)->is_If() &&
parent_c2->is_Proj() && parent_c1->in(0) == parent_c2->in(0)) {
// Both branches have the same expensive node so move it up
// before the if.
c1 = c2 = idom(parent_c1->in(0));
}
// Do the actual moves
if (n1->in(0) != c1) {
_igvn.hash_delete(n1);
n1->set_req(0, c1);
_igvn.hash_insert(n1);
_igvn._worklist.push(n1);
progress = true;
}
if (n2->in(0) != c2) {
_igvn.hash_delete(n2);
n2->set_req(0, c2);
_igvn.hash_insert(n2);
_igvn._worklist.push(n2);
progress = true;
}
}
}
}
return progress;
}
//============================================================================= //=============================================================================
//----------------------------build_and_optimize------------------------------- //----------------------------build_and_optimize-------------------------------
// Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to // Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to
...@@ -1960,7 +2151,9 @@ void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool skip_loop_opts) ...@@ -1960,7 +2151,9 @@ void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool skip_loop_opts)
} }
// Nothing to do, so get out // Nothing to do, so get out
if( !C->has_loops() && !skip_loop_opts && !do_split_ifs && !_verify_me && !_verify_only ) { bool stop_early = !C->has_loops() && !skip_loop_opts && !do_split_ifs && !_verify_me && !_verify_only;
bool do_expensive_nodes = C->should_optimize_expensive_nodes(_igvn);
if (stop_early && !do_expensive_nodes) {
_igvn.optimize(); // Cleanup NeverBranches _igvn.optimize(); // Cleanup NeverBranches
return; return;
} }
...@@ -2058,6 +2251,21 @@ void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool skip_loop_opts) ...@@ -2058,6 +2251,21 @@ void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool skip_loop_opts)
return; return;
} }
if (stop_early) {
assert(do_expensive_nodes, "why are we here?");
if (process_expensive_nodes()) {
// If we made some progress when processing expensive nodes then
// the IGVN may modify the graph in a way that will allow us to
// make some more progress: we need to try processing expensive
// nodes again.
C->set_major_progress();
}
_igvn.optimize();
return;
}
// Some parser-inserted loop predicates could never be used by loop // Some parser-inserted loop predicates could never be used by loop
// predication or they were moved away from loop during some optimizations. // predication or they were moved away from loop during some optimizations.
// For example, peeling. Eliminate them before next loop optimizations. // For example, peeling. Eliminate them before next loop optimizations.
...@@ -2120,6 +2328,10 @@ void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool skip_loop_opts) ...@@ -2120,6 +2328,10 @@ void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool skip_loop_opts)
NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); ); NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); );
} }
if (!C->major_progress() && do_expensive_nodes && process_expensive_nodes()) {
C->set_major_progress();
}
// Perform loop predication before iteration splitting // Perform loop predication before iteration splitting
if (C->has_loops() && !C->major_progress() && (C->predicate_count() > 0)) { if (C->has_loops() && !C->major_progress() && (C->predicate_count() > 0)) {
_ltree_root->_child->loop_predication(this); _ltree_root->_child->loop_predication(this);
...@@ -3299,7 +3511,7 @@ void PhaseIdealLoop::build_loop_late_post( Node *n ) { ...@@ -3299,7 +3511,7 @@ void PhaseIdealLoop::build_loop_late_post( Node *n ) {
#ifdef ASSERT #ifdef ASSERT
if (legal->is_Start() && !early->is_Root()) { if (legal->is_Start() && !early->is_Root()) {
// Bad graph. Print idom path and fail. // Bad graph. Print idom path and fail.
dump_bad_graph(n, early, LCA); dump_bad_graph("Bad graph detected in build_loop_late", n, early, LCA);
assert(false, "Bad graph detected in build_loop_late"); assert(false, "Bad graph detected in build_loop_late");
} }
#endif #endif
...@@ -3350,8 +3562,8 @@ void PhaseIdealLoop::build_loop_late_post( Node *n ) { ...@@ -3350,8 +3562,8 @@ void PhaseIdealLoop::build_loop_late_post( Node *n ) {
} }
#ifdef ASSERT #ifdef ASSERT
void PhaseIdealLoop::dump_bad_graph(Node* n, Node* early, Node* LCA) { void PhaseIdealLoop::dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA) {
tty->print_cr( "Bad graph detected in build_loop_late"); tty->print_cr(msg);
tty->print("n: "); n->dump(); tty->print("n: "); n->dump();
tty->print("early(n): "); early->dump(); tty->print("early(n): "); early->dump();
if (n->in(0) != NULL && !n->in(0)->is_top() && if (n->in(0) != NULL && !n->in(0)->is_top() &&
......
...@@ -263,9 +263,18 @@ public: ...@@ -263,9 +263,18 @@ public:
bool stride_is_con() const { Node *tmp = stride (); return (tmp != NULL && tmp->is_Con()); } bool stride_is_con() const { Node *tmp = stride (); return (tmp != NULL && tmp->is_Con()); }
BoolTest::mask test_trip() const { return in(TestValue)->as_Bool()->_test._test; } BoolTest::mask test_trip() const { return in(TestValue)->as_Bool()->_test._test; }
CountedLoopNode *loopnode() const { CountedLoopNode *loopnode() const {
// The CountedLoopNode that goes with this CountedLoopEndNode may
// have been optimized out by the IGVN so be cautious with the
// pattern matching on the graph
if (phi() == NULL) {
return NULL;
}
Node *ln = phi()->in(0); Node *ln = phi()->in(0);
assert( ln->Opcode() == Op_CountedLoop, "malformed loop" ); if (ln->is_CountedLoop() && ln->as_CountedLoop()->loopexit() == this) {
return (CountedLoopNode*)ln; } return (CountedLoopNode*)ln;
}
return NULL;
}
#ifndef PRODUCT #ifndef PRODUCT
virtual void dump_spec(outputStream *st) const; virtual void dump_spec(outputStream *st) const;
...@@ -598,6 +607,7 @@ public: ...@@ -598,6 +607,7 @@ public:
// check if transform created new nodes that need _ctrl recorded // check if transform created new nodes that need _ctrl recorded
Node *get_late_ctrl( Node *n, Node *early ); Node *get_late_ctrl( Node *n, Node *early );
Node *get_early_ctrl( Node *n ); Node *get_early_ctrl( Node *n );
Node *get_early_ctrl_for_expensive(Node *n, Node* earliest);
void set_early_ctrl( Node *n ); void set_early_ctrl( Node *n );
void set_subtree_ctrl( Node *root ); void set_subtree_ctrl( Node *root );
void set_ctrl( Node *n, Node *ctrl ) { void set_ctrl( Node *n, Node *ctrl ) {
...@@ -905,6 +915,16 @@ public: ...@@ -905,6 +915,16 @@ public:
void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1); void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1);
void eliminate_useless_predicates(); void eliminate_useless_predicates();
// Change the control input of expensive nodes to allow commoning by
// IGVN when it is guaranteed to not result in a more frequent
// execution of the expensive node. Return true if progress.
bool process_expensive_nodes();
// Check whether node has become unreachable
bool is_node_unreachable(Node *n) const {
return !has_node(n) || n->is_unreachable(_igvn);
}
// Eliminate range-checks and other trip-counter vs loop-invariant tests. // Eliminate range-checks and other trip-counter vs loop-invariant tests.
void do_range_check( IdealLoopTree *loop, Node_List &old_new ); void do_range_check( IdealLoopTree *loop, Node_List &old_new );
...@@ -1043,7 +1063,7 @@ public: ...@@ -1043,7 +1063,7 @@ public:
void register_new_node( Node *n, Node *blk ); void register_new_node( Node *n, Node *blk );
#ifdef ASSERT #ifdef ASSERT
void dump_bad_graph(Node* n, Node* early, Node* LCA); void dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA);
#endif #endif
#ifndef PRODUCT #ifndef PRODUCT
......
...@@ -493,6 +493,8 @@ Node *Node::clone() const { ...@@ -493,6 +493,8 @@ Node *Node::clone() const {
} }
if (is_macro()) if (is_macro())
compile->add_macro_node(n); compile->add_macro_node(n);
if (is_expensive())
compile->add_expensive_node(n);
n->set_idx(compile->next_unique()); // Get new unique index as well n->set_idx(compile->next_unique()); // Get new unique index as well
debug_only( n->verify_construction() ); debug_only( n->verify_construction() );
...@@ -616,6 +618,9 @@ void Node::destruct() { ...@@ -616,6 +618,9 @@ void Node::destruct() {
if (is_macro()) { if (is_macro()) {
compile->remove_macro_node(this); compile->remove_macro_node(this);
} }
if (is_expensive()) {
compile->remove_expensive_node(this);
}
#ifdef ASSERT #ifdef ASSERT
// We will not actually delete the storage, but we'll make the node unusable. // We will not actually delete the storage, but we'll make the node unusable.
*(address*)this = badAddress; // smash the C++ vtbl, probably *(address*)this = badAddress; // smash the C++ vtbl, probably
...@@ -689,6 +694,13 @@ bool Node::is_dead() const { ...@@ -689,6 +694,13 @@ bool Node::is_dead() const {
} }
#endif #endif
//------------------------------is_unreachable---------------------------------
bool Node::is_unreachable(PhaseIterGVN &igvn) const {
assert(!is_Mach(), "doesn't work with MachNodes");
return outcnt() == 0 || igvn.type(this) == Type::TOP || in(0)->is_top();
}
//------------------------------add_req---------------------------------------- //------------------------------add_req----------------------------------------
// Add a new required input at the end // Add a new required input at the end
void Node::add_req( Node *n ) { void Node::add_req( Node *n ) {
...@@ -1246,6 +1258,9 @@ static void kill_dead_code( Node *dead, PhaseIterGVN *igvn ) { ...@@ -1246,6 +1258,9 @@ static void kill_dead_code( Node *dead, PhaseIterGVN *igvn ) {
if (dead->is_macro()) { if (dead->is_macro()) {
igvn->C->remove_macro_node(dead); igvn->C->remove_macro_node(dead);
} }
if (dead->is_expensive()) {
igvn->C->remove_expensive_node(dead);
}
// Kill all inputs to the dead guy // Kill all inputs to the dead guy
for (uint i=0; i < dead->req(); i++) { for (uint i=0; i < dead->req(); i++) {
Node *n = dead->in(i); // Get input to dead guy Node *n = dead->in(i); // Get input to dead guy
......
...@@ -378,6 +378,8 @@ protected: ...@@ -378,6 +378,8 @@ protected:
bool is_dead() const; bool is_dead() const;
#define is_not_dead(n) ((n) == NULL || !VerifyIterativeGVN || !((n)->is_dead())) #define is_not_dead(n) ((n) == NULL || !VerifyIterativeGVN || !((n)->is_dead()))
#endif #endif
// Check whether node has become unreachable
bool is_unreachable(PhaseIterGVN &igvn) const;
// Set a required input edge, also updates corresponding output edge // Set a required input edge, also updates corresponding output edge
void add_req( Node *n ); // Append a NEW required input void add_req( Node *n ); // Append a NEW required input
...@@ -646,7 +648,8 @@ public: ...@@ -646,7 +648,8 @@ public:
Flag_may_be_short_branch = Flag_is_dead_loop_safe << 1, Flag_may_be_short_branch = Flag_is_dead_loop_safe << 1,
Flag_avoid_back_to_back = Flag_may_be_short_branch << 1, Flag_avoid_back_to_back = Flag_may_be_short_branch << 1,
Flag_has_call = Flag_avoid_back_to_back << 1, Flag_has_call = Flag_avoid_back_to_back << 1,
_max_flags = (Flag_has_call << 1) - 1 // allow flags combination Flag_is_expensive = Flag_has_call << 1,
_max_flags = (Flag_is_expensive << 1) - 1 // allow flags combination
}; };
private: private:
...@@ -819,6 +822,8 @@ public: ...@@ -819,6 +822,8 @@ public:
// The node is a "macro" node which needs to be expanded before matching // The node is a "macro" node which needs to be expanded before matching
bool is_macro() const { return (_flags & Flag_is_macro) != 0; } bool is_macro() const { return (_flags & Flag_is_macro) != 0; }
// The node is expensive: the best control is set during loop opts
bool is_expensive() const { return (_flags & Flag_is_expensive) != 0 && in(0) != NULL; }
//----------------- Optimization //----------------- Optimization
......
...@@ -1203,6 +1203,9 @@ void PhaseIterGVN::remove_globally_dead_node( Node *dead ) { ...@@ -1203,6 +1203,9 @@ void PhaseIterGVN::remove_globally_dead_node( Node *dead ) {
if (dead->is_macro()) { if (dead->is_macro()) {
C->remove_macro_node(dead); C->remove_macro_node(dead);
} }
if (dead->is_expensive()) {
C->remove_expensive_node(dead);
}
if (recurse) { if (recurse) {
continue; continue;
......
...@@ -456,7 +456,10 @@ public: ...@@ -456,7 +456,10 @@ public:
// Exponentiate a double // Exponentiate a double
class ExpDNode : public Node { class ExpDNode : public Node {
public: public:
ExpDNode( Node *c, Node *in1 ) : Node(c, in1) {} ExpDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
init_flags(Flag_is_expensive);
C->add_expensive_node(this);
}
virtual int Opcode() const; virtual int Opcode() const;
const Type *bottom_type() const { return Type::DOUBLE; } const Type *bottom_type() const { return Type::DOUBLE; }
virtual uint ideal_reg() const { return Op_RegD; } virtual uint ideal_reg() const { return Op_RegD; }
...@@ -489,7 +492,10 @@ public: ...@@ -489,7 +492,10 @@ public:
// Raise a double to a double power // Raise a double to a double power
class PowDNode : public Node { class PowDNode : public Node {
public: public:
PowDNode(Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) {} PowDNode(Compile* C, Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) {
init_flags(Flag_is_expensive);
C->add_expensive_node(this);
}
virtual int Opcode() const; virtual int Opcode() const;
const Type *bottom_type() const { return Type::DOUBLE; } const Type *bottom_type() const { return Type::DOUBLE; }
virtual uint ideal_reg() const { return Op_RegD; } virtual uint ideal_reg() const { return Op_RegD; }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册