diff --git a/src/share/vm/opto/compile.cpp b/src/share/vm/opto/compile.cpp index 6b7fd37919e87054fbe567e4f470e95dd5fcb6a7..0db21631f2ca7b2cbcb48d67bb658dffd0f28e86 100644 --- a/src/share/vm/opto/compile.cpp +++ b/src/share/vm/opto/compile.cpp @@ -441,6 +441,8 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr _orig_pc_slot_offset_in_bytes(0), _node_bundling_limit(0), _node_bundling_base(NULL), + _java_calls(0), + _inner_loops(0), #ifndef PRODUCT _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")), _printer(IdealGraphPrinter::printer()), @@ -711,6 +713,8 @@ Compile::Compile( ciEnv* ci_env, _code_buffer("Compile::Fill_buffer"), _node_bundling_limit(0), _node_bundling_base(NULL), + _java_calls(0), + _inner_loops(0), #ifndef PRODUCT _trace_opto_output(TraceOptoOutput), _printer(NULL), @@ -1851,22 +1855,26 @@ struct Final_Reshape_Counts : public StackObj { int _float_count; // count float ops requiring 24-bit precision int _double_count; // count double ops requiring more precision int _java_call_count; // count non-inlined 'java' calls + int _inner_loop_count; // count loops which need alignment VectorSet _visited; // Visitation flags Node_List _tests; // Set of IfNodes & PCTableNodes Final_Reshape_Counts() : - _call_count(0), _float_count(0), _double_count(0), _java_call_count(0), + _call_count(0), _float_count(0), _double_count(0), + _java_call_count(0), _inner_loop_count(0), _visited( Thread::current()->resource_area() ) { } void inc_call_count () { _call_count ++; } void inc_float_count () { _float_count ++; } void inc_double_count() { _double_count++; } void inc_java_call_count() { _java_call_count++; } + void inc_inner_loop_count() { _inner_loop_count++; } int get_call_count () const { return _call_count ; } int get_float_count () const { return _float_count ; } int get_double_count() const { return _double_count; } int get_java_call_count() const { return _java_call_count; } + int get_inner_loop_count() const { return _inner_loop_count; } }; static bool oop_offset_is_sane(const TypeInstPtr* tp) { @@ -1878,7 +1886,7 @@ static bool oop_offset_is_sane(const TypeInstPtr* tp) { //------------------------------final_graph_reshaping_impl---------------------- // Implement items 1-5 from final_graph_reshaping below. -static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) { +static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { if ( n->outcnt() == 0 ) return; // dead node uint nop = n->Opcode(); @@ -1920,13 +1928,13 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) { case Op_CmpF: case Op_CmpF3: // case Op_ConvL2F: // longs are split into 32-bit halves - fpu.inc_float_count(); + frc.inc_float_count(); break; case Op_ConvF2D: case Op_ConvD2F: - fpu.inc_float_count(); - fpu.inc_double_count(); + frc.inc_float_count(); + frc.inc_double_count(); break; // Count all double operations that may use FPU @@ -1943,7 +1951,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) { case Op_ConD: case Op_CmpD: case Op_CmpD3: - fpu.inc_double_count(); + frc.inc_double_count(); break; case Op_Opaque1: // Remove Opaque Nodes before matching case Op_Opaque2: // Remove Opaque Nodes before matching @@ -1952,7 +1960,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) { case Op_CallStaticJava: case Op_CallJava: case Op_CallDynamicJava: - fpu.inc_java_call_count(); // Count java call site; + frc.inc_java_call_count(); // Count java call site; case Op_CallRuntime: case Op_CallLeaf: case Op_CallLeafNoFP: { @@ -1963,7 +1971,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) { // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking, // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ... if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) { - fpu.inc_call_count(); // Count the call site + frc.inc_call_count(); // Count the call site } else { // See if uncommon argument is shared Node *n = call->in(TypeFunc::Parms); int nop = n->Opcode(); @@ -1984,11 +1992,11 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) { case Op_StoreD: case Op_LoadD: case Op_LoadD_unaligned: - fpu.inc_double_count(); + frc.inc_double_count(); goto handle_mem; case Op_StoreF: case Op_LoadF: - fpu.inc_float_count(); + frc.inc_float_count(); goto handle_mem; case Op_StoreB: @@ -2325,6 +2333,12 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) { n->subsume_by(btp); } break; + case Op_Loop: + case Op_CountedLoop: + if (n->as_Loop()->is_inner_loop()) { + frc.inc_inner_loop_count(); + } + break; default: assert( !n->is_Call(), "" ); assert( !n->is_Mem(), "" ); @@ -2333,17 +2347,17 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) { // Collect CFG split points if (n->is_MultiBranch()) - fpu._tests.push(n); + frc._tests.push(n); } //------------------------------final_graph_reshaping_walk--------------------- // Replacing Opaque nodes with their input in final_graph_reshaping_impl(), // requires that the walk visits a node's inputs before visiting the node. -static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &fpu ) { +static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) { ResourceArea *area = Thread::current()->resource_area(); Unique_Node_List sfpt(area); - fpu._visited.set(root->_idx); // first, mark node as visited + frc._visited.set(root->_idx); // first, mark node as visited uint cnt = root->req(); Node *n = root; uint i = 0; @@ -2352,7 +2366,7 @@ static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Re // Place all non-visited non-null inputs onto stack Node* m = n->in(i); ++i; - if (m != NULL && !fpu._visited.test_set(m->_idx)) { + if (m != NULL && !frc._visited.test_set(m->_idx)) { if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) sfpt.push(m); cnt = m->req(); @@ -2362,7 +2376,7 @@ static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Re } } else { // Now do post-visit work - final_graph_reshaping_impl( n, fpu ); + final_graph_reshaping_impl( n, frc ); if (nstack.is_empty()) break; // finished n = nstack.node(); // Get node from stack @@ -2443,16 +2457,16 @@ bool Compile::final_graph_reshaping() { return true; } - Final_Reshape_Counts fpu; + Final_Reshape_Counts frc; // Visit everybody reachable! // Allocate stack of size C->unique()/2 to avoid frequent realloc Node_Stack nstack(unique() >> 1); - final_graph_reshaping_walk(nstack, root(), fpu); + final_graph_reshaping_walk(nstack, root(), frc); // Check for unreachable (from below) code (i.e., infinite loops). - for( uint i = 0; i < fpu._tests.size(); i++ ) { - MultiBranchNode *n = fpu._tests[i]->as_MultiBranch(); + for( uint i = 0; i < frc._tests.size(); i++ ) { + MultiBranchNode *n = frc._tests[i]->as_MultiBranch(); // Get number of CFG targets. // Note that PCTables include exception targets after calls. uint required_outcnt = n->required_outcnt(); @@ -2498,7 +2512,7 @@ bool Compile::final_graph_reshaping() { // Check that I actually visited all kids. Unreached kids // must be infinite loops. for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) - if (!fpu._visited.test(n->fast_out(j)->_idx)) { + if (!frc._visited.test(n->fast_out(j)->_idx)) { record_method_not_compilable("infinite loop"); return true; // Found unvisited kid; must be unreach } @@ -2507,13 +2521,14 @@ bool Compile::final_graph_reshaping() { // If original bytecodes contained a mixture of floats and doubles // check if the optimizer has made it homogenous, item (3). if( Use24BitFPMode && Use24BitFP && - fpu.get_float_count() > 32 && - fpu.get_double_count() == 0 && - (10 * fpu.get_call_count() < fpu.get_float_count()) ) { + frc.get_float_count() > 32 && + frc.get_double_count() == 0 && + (10 * frc.get_call_count() < frc.get_float_count()) ) { set_24_bit_selection_and_mode( false, true ); } - set_has_java_calls(fpu.get_java_call_count() > 0); + set_java_calls(frc.get_java_call_count()); + set_inner_loops(frc.get_inner_loop_count()); // No infinite loops, no reason to bail out. return false; diff --git a/src/share/vm/opto/compile.hpp b/src/share/vm/opto/compile.hpp index dcd6813a893f758f53576c935fb1dc3f14bd4e25..bad984ff65d0c5675a72e3ee69380d61e01c0095 100644 --- a/src/share/vm/opto/compile.hpp +++ b/src/share/vm/opto/compile.hpp @@ -223,7 +223,8 @@ class Compile : public Phase { PhaseCFG* _cfg; // Results of CFG finding bool _select_24_bit_instr; // We selected an instruction with a 24-bit result bool _in_24_bit_fp_mode; // We are emitting instructions with 24-bit results - bool _has_java_calls; // True if the method has java calls + int _java_calls; // Number of java calls in the method + int _inner_loops; // Number of inner loops in the method Matcher* _matcher; // Engine to map ideal to machine instructions PhaseRegAlloc* _regalloc; // Results of register allocation. int _frame_slots; // Size of total frame in stack slots @@ -505,7 +506,9 @@ class Compile : public Phase { PhaseCFG* cfg() { return _cfg; } bool select_24_bit_instr() const { return _select_24_bit_instr; } bool in_24_bit_fp_mode() const { return _in_24_bit_fp_mode; } - bool has_java_calls() const { return _has_java_calls; } + bool has_java_calls() const { return _java_calls > 0; } + int java_calls() const { return _java_calls; } + int inner_loops() const { return _inner_loops; } Matcher* matcher() { return _matcher; } PhaseRegAlloc* regalloc() { return _regalloc; } int frame_slots() const { return _frame_slots; } @@ -532,7 +535,8 @@ class Compile : public Phase { _in_24_bit_fp_mode = mode; } - void set_has_java_calls(bool z) { _has_java_calls = z; } + void set_java_calls(int z) { _java_calls = z; } + void set_inner_loops(int z) { _inner_loops = z; } // Instruction bits passed off to the VM int code_size() { return _method_size; } diff --git a/src/share/vm/opto/output.cpp b/src/share/vm/opto/output.cpp index 3c8af5991429c96cbf74d7741e6df28ce4503cb8..1bc5361c7a1acf3e7b1f6766c8f51a78095b44e6 100644 --- a/src/share/vm/opto/output.cpp +++ b/src/share/vm/opto/output.cpp @@ -50,6 +50,13 @@ void Compile::Output() { init_scratch_buffer_blob(); if (failing()) return; // Out of memory + // The number of new nodes (mostly MachNop) is proportional to + // the number of java calls and inner loops which are aligned. + if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 + + C->inner_loops()*(OptoLoopAlignment-1)), + "out of nodes before code generation" ) ) { + return; + } // Make sure I can find the Start Node Block_Array& bbs = _cfg->_bbs; Block *entry = _cfg->_blocks[1]; @@ -1105,7 +1112,7 @@ void Compile::Fill_buffer() { uint *call_returns = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1); uint return_offset = 0; - MachNode *nop = new (this) MachNopNode(); + int nop_size = (new (this) MachNopNode())->size(_regalloc); int previous_offset = 0; int current_offset = 0; @@ -1188,7 +1195,6 @@ void Compile::Fill_buffer() { } // align the instruction if necessary - int nop_size = nop->size(_regalloc); int padding = mach->compute_padding(current_offset); // Make sure safepoint node for polling is distinct from a call's // return by adding a nop if needed. @@ -1372,7 +1378,6 @@ void Compile::Fill_buffer() { // If the next block is the top of a loop, pad this block out to align // the loop top a little. Helps prevent pipe stalls at loop back branches. - int nop_size = (new (this) MachNopNode())->size(_regalloc); if( i<_cfg->_num_blocks-1 ) { Block *nb = _cfg->_blocks[i+1]; uint padding = nb->alignment_padding(current_offset);