提交 4892ef3c 编写于 作者: G goetz

8029015: PPC64 (part 216): opto: trap based null and range checks

Summary: On PPC64 use tdi instruction that does a compare and raises SIGTRAP for NULL and range checks.
Reviewed-by: kvn
上级 4fec4ec7
......@@ -38,6 +38,7 @@ define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window ma
define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks.
define_pd_global(bool, TrapBasedNullChecks, true);
define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast.
// Use large code-entry alignment.
......@@ -100,12 +101,6 @@ define_pd_global(uintx, TypeProfileLevel, 0);
product(bool, TrapBasedNotEntrantChecks, true, \
"Raise and handle SIGTRAP if calling not entrant or zombie" \
" method.") \
product(bool, TrapBasedNullChecks, true, \
"Generate code for null checks that uses a cmp and trap " \
"instruction raising SIGTRAP. This is only used if an access to" \
"null (+offset) will not raise a SIGSEGV.") \
product(bool, TrapBasedRangeChecks, true, \
"Raise and handle SIGTRAP if array out of bounds check fails.") \
product(bool, TraceTraps, false, "Trace all traps the signal handler" \
"handles.") \
\
......
......@@ -76,11 +76,13 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC {
}
static bool is_sigill_zombie_not_entrant_at(address addr);
#ifdef COMPILER2
// SIGTRAP-based implicit range checks
bool is_sigtrap_range_check() {
assert(UseSIGTRAP && TrapBasedRangeChecks, "precondition");
return MacroAssembler::is_trap_range_check(long_at(0));
}
#endif
// 'should not reach here'.
bool is_sigtrap_should_not_reach_here() {
......
......@@ -77,14 +77,17 @@ void VM_Version::initialize() {
MSG(TrapBasedICMissChecks);
MSG(TrapBasedNotEntrantChecks);
MSG(TrapBasedNullChecks);
MSG(TrapBasedRangeChecks);
FLAG_SET_ERGO(bool, TrapBasedNotEntrantChecks, false);
FLAG_SET_ERGO(bool, TrapBasedNullChecks, false);
FLAG_SET_ERGO(bool, TrapBasedICMissChecks, false);
FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false);
}
#ifdef COMPILER2
if (!UseSIGTRAP) {
MSG(TrapBasedRangeChecks);
FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false);
}
// On Power6 test for section size.
if (PowerArchitecturePPC64 == 6)
determine_section_size();
......
......@@ -90,6 +90,8 @@ define_pd_global(uint64_t,MaxRAM, 4ULL*G);
define_pd_global(uintx, CodeCacheMinBlockLength, 4);
define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K);
define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed on sparc.
// Heap related flags
define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M));
......
......@@ -43,7 +43,8 @@ define_pd_global(bool, CountInterpCalls, false); // not implemented i
define_pd_global(bool, NeedsDeoptSuspend, true); // register window machines need this
define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks
define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast
define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on sparc.
define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast
define_pd_global(intx, CodeEntryAlignment, 32);
// The default setting 16/16 seems to work best.
......
......@@ -88,6 +88,8 @@ define_pd_global(intx, ReservedCodeCacheSize, 48*M);
define_pd_global(uintx, CodeCacheMinBlockLength, 4);
define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K);
define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed on x86.
// Heap related flags
define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M));
......
......@@ -37,7 +37,8 @@ define_pd_global(bool, CountInterpCalls, true);
define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this
define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks
define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast
define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86.
define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast
// See 4827828 for this change. There is no globals_core_i486.hpp. I can't
// assign a different value for C2 without touching a number of files. Use
......
......@@ -38,6 +38,7 @@ define_pd_global(bool, CountInterpCalls, true);
define_pd_global(bool, NeedsDeoptSuspend, false);
define_pd_global(bool, ImplicitNullChecks, true);
define_pd_global(bool, TrapBasedNullChecks, false);
define_pd_global(bool, UncommonNullCast, true);
define_pd_global(intx, CodeEntryAlignment, 32);
......
......@@ -272,6 +272,7 @@ int main(int argc, char *argv[])
AD.addInclude(AD._CPP_PIPELINE_file, "adfiles", get_basename(AD._HPP_file._name));
AD.addInclude(AD._DFA_file, "precompiled.hpp");
AD.addInclude(AD._DFA_file, "adfiles", get_basename(AD._HPP_file._name));
AD.addInclude(AD._DFA_file, "opto/cfgnode.hpp"); // Use PROB_MAX in predicate.
AD.addInclude(AD._DFA_file, "opto/matcher.hpp");
AD.addInclude(AD._DFA_file, "opto/opcodes.hpp");
// Make sure each .cpp file starts with include lines:
......
......@@ -1615,15 +1615,19 @@ void ArchDesc::declareClasses(FILE *fp) {
Attribute *attr = instr->_attribs;
bool avoid_back_to_back = false;
while (attr != NULL) {
if (strcmp (attr->_ident,"ins_cost") &&
strncmp(attr->_ident,"ins_field_", 10) != 0 &&
strcmp (attr->_ident,"ins_short_branch")) {
fprintf(fp," virtual int %s() const { return %s; }\n",
attr->_ident, attr->_val);
if (strcmp (attr->_ident, "ins_cost") != 0 &&
strncmp(attr->_ident, "ins_field_", 10) != 0 &&
// Must match function in node.hpp: return type bool, no prefix "ins_".
strcmp (attr->_ident, "ins_is_TrapBasedCheckNode") != 0 &&
strcmp (attr->_ident, "ins_short_branch") != 0) {
fprintf(fp, " virtual int %s() const { return %s; }\n", attr->_ident, attr->_val);
}
// Check value for ins_avoid_back_to_back, and if it is true (1), set the flag
if (!strcmp(attr->_ident,"ins_avoid_back_to_back") && attr->int_val(*this) != 0)
if (!strcmp(attr->_ident, "ins_avoid_back_to_back") != 0 && attr->int_val(*this) != 0)
avoid_back_to_back = true;
if (strcmp (attr->_ident, "ins_is_TrapBasedCheckNode") == 0)
fprintf(fp, " virtual bool is_TrapBasedCheckNode() const { return %s; }\n", attr->_val);
attr = (Attribute *)attr->_next;
}
......
......@@ -530,19 +530,28 @@ void PhaseCFG::insert_goto_at(uint block_no, uint succ_no) {
// Does this block end in a multiway branch that cannot have the default case
// flipped for another case?
static bool no_flip_branch( Block *b ) {
static bool no_flip_branch(Block *b) {
int branch_idx = b->number_of_nodes() - b->_num_succs-1;
if( branch_idx < 1 ) return false;
Node *bra = b->get_node(branch_idx);
if( bra->is_Catch() )
if (branch_idx < 1) {
return false;
}
Node *branch = b->get_node(branch_idx);
if (branch->is_Catch()) {
return true;
if( bra->is_Mach() ) {
if( bra->is_MachNullCheck() )
}
if (branch->is_Mach()) {
if (branch->is_MachNullCheck()) {
return true;
}
int iop = branch->as_Mach()->ideal_Opcode();
if (iop == Op_FastLock || iop == Op_FastUnlock) {
return true;
int iop = bra->as_Mach()->ideal_Opcode();
if( iop == Op_FastLock || iop == Op_FastUnlock )
}
// Don't flip if branch has an implicit check.
if (branch->as_Mach()->is_TrapBasedCheckNode()) {
return true;
}
}
return false;
}
......@@ -700,6 +709,57 @@ void PhaseCFG::remove_empty_blocks() {
} // End of for all blocks
}
Block *PhaseCFG::fixup_trap_based_check(Node *branch, Block *block, int block_pos, Block *bnext) {
// Trap based checks must fall through to the successor with
// PROB_ALWAYS.
// They should be an If with 2 successors.
assert(branch->is_MachIf(), "must be If");
assert(block->_num_succs == 2, "must have 2 successors");
// Get the If node and the projection for the first successor.
MachIfNode *iff = block->get_node(block->number_of_nodes()-3)->as_MachIf();
ProjNode *proj0 = block->get_node(block->number_of_nodes()-2)->as_Proj();
ProjNode *proj1 = block->get_node(block->number_of_nodes()-1)->as_Proj();
ProjNode *projt = (proj0->Opcode() == Op_IfTrue) ? proj0 : proj1;
ProjNode *projf = (proj0->Opcode() == Op_IfFalse) ? proj0 : proj1;
// Assert that proj0 and succs[0] match up. Similarly for proj1 and succs[1].
assert(proj0->raw_out(0) == block->_succs[0]->head(), "Mismatch successor 0");
assert(proj1->raw_out(0) == block->_succs[1]->head(), "Mismatch successor 1");
ProjNode *proj_always;
ProjNode *proj_never;
// We must negate the branch if the implicit check doesn't follow
// the branch's TRUE path. Then, the new TRUE branch target will
// be the old FALSE branch target.
if (iff->_prob <= 2*PROB_NEVER) { // There are small rounding errors.
proj_never = projt;
proj_always = projf;
} else {
// We must negate the branch if the trap doesn't follow the
// branch's TRUE path. Then, the new TRUE branch target will
// be the old FALSE branch target.
proj_never = projf;
proj_always = projt;
iff->negate();
}
assert(iff->_prob <= 2*PROB_NEVER, "Trap based checks are expected to trap never!");
// Map the successors properly
block->_succs.map(0, get_block_for_node(proj_never ->raw_out(0))); // The target of the trap.
block->_succs.map(1, get_block_for_node(proj_always->raw_out(0))); // The fall through target.
// Place the fall through block after this block.
Block *bs1 = block->non_connector_successor(1);
if (bs1 != bnext && move_to_next(bs1, block_pos)) {
bnext = bs1;
}
// If the fall through block still is not the next block, insert a goto.
if (bs1 != bnext) {
insert_goto_at(block_pos, 1);
}
return bnext;
}
// Fix up the final control flow for basic blocks.
void PhaseCFG::fixup_flow() {
// Fixup final control flow for the blocks. Remove jump-to-next
......@@ -723,8 +783,21 @@ void PhaseCFG::fixup_flow() {
// Check for multi-way branches where I cannot negate the test to
// exchange the true and false targets.
if (no_flip_branch(block)) {
// Find fall through case - if must fall into its target
// Find fall through case - if must fall into its target.
// Get the index of the branch's first successor.
int branch_idx = block->number_of_nodes() - block->_num_succs;
// The branch is 1 before the branch's first successor.
Node *branch = block->get_node(branch_idx-1);
// Handle no-flip branches which have implicit checks and which require
// special block ordering and individual semantics of the 'fall through
// case'.
if ((TrapBasedNullChecks || TrapBasedRangeChecks) &&
branch->is_Mach() && branch->as_Mach()->is_TrapBasedCheckNode()) {
bnext = fixup_trap_based_check(branch, block, i, bnext);
} else {
// Else, default handling for no-flip branches
for (uint j2 = 0; j2 < block->_num_succs; j2++) {
const ProjNode* p = block->get_node(branch_idx + j2)->as_Proj();
if (p->_con == 0) {
......@@ -744,6 +817,7 @@ void PhaseCFG::fixup_flow() {
break;
}
}
}
// Remove all CatchProjs
for (uint j = 0; j < block->_num_succs; j++) {
......
......@@ -590,6 +590,7 @@ class PhaseCFG : public Phase {
// Remove empty basic blocks
void remove_empty_blocks();
Block *fixup_trap_based_check(Node *branch, Block *block, int block_pos, Block *bnext);
void fixup_flow();
// Insert a node into a block at index and map the node to the block
......
......@@ -654,7 +654,11 @@
"Propagate type improvements in callers of inlinee if possible") \
\
experimental(bool, UseTypeSpeculation, false, \
"Speculatively propagate types from profiles")
"Speculatively propagate types from profiles") \
\
product_pd(bool, TrapBasedRangeChecks, \
"Generate code for range checks that uses a cmp and trap " \
"instruction raising SIGTRAP. Used on PPC64.") \
C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG)
......
......@@ -860,6 +860,10 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size);
set_fixed_slots(next_slot);
// Compute when to use implicit null checks. Used by matching trap based
// nodes and NullCheck optimization.
set_allowed_deopt_reasons();
// Now generate code
Code_Gen();
if (failing()) return;
......@@ -948,7 +952,8 @@ Compile::Compile( ciEnv* ci_env,
_inlining_incrementally(false),
_print_inlining_list(NULL),
_print_inlining_idx(0),
_preserve_jvm_state(0) {
_preserve_jvm_state(0),
_allowed_reasons(0) {
C = this;
#ifndef PRODUCT
......@@ -3350,6 +3355,19 @@ bool Compile::too_many_recompiles(ciMethod* method,
}
}
// Compute when not to trap. Used by matching trap based nodes and
// NullCheck optimization.
void Compile::set_allowed_deopt_reasons() {
_allowed_reasons = 0;
if (is_method_compilation()) {
for (int rs = (int)Deoptimization::Reason_none+1; rs < Compile::trapHistLength; rs++) {
assert(rs < BitsPerInt, "recode bit map");
if (!too_many_traps((Deoptimization::DeoptReason) rs)) {
_allowed_reasons |= nth_bit(rs);
}
}
}
}
#ifndef PRODUCT
//------------------------------verify_graph_edges---------------------------
......
......@@ -855,6 +855,11 @@ class Compile : public Phase {
ciMethodData* logmd = NULL);
// Report if there were too many recompiles at a method and bci.
bool too_many_recompiles(ciMethod* method, int bci, Deoptimization::DeoptReason reason);
// Return a bitset with the reasons where deoptimization is allowed,
// i.e., where there were not too many uncommon traps.
int _allowed_reasons;
int allowed_deopt_reasons() { return _allowed_reasons; }
void set_allowed_deopt_reasons();
// Parsing, optimization
PhaseGVN* initial_gvn() { return _initial_gvn; }
......
......@@ -1330,15 +1330,6 @@ void PhaseCFG::global_code_motion() {
// with suitable memory ops nearby. Use the memory op to do the NULL check.
// I can generate a memory op if there is not one nearby.
if (C->is_method_compilation()) {
// Don't do it for natives, adapters, or runtime stubs
int allowed_reasons = 0;
// ...and don't do it when there have been too many traps, globally.
for (int reason = (int)Deoptimization::Reason_none+1;
reason < Compile::trapHistLength; reason++) {
assert(reason < BitsPerInt, "recode bit map");
if (!C->too_many_traps((Deoptimization::DeoptReason) reason))
allowed_reasons |= nth_bit(reason);
}
// By reversing the loop direction we get a very minor gain on mpegaudio.
// Feel free to revert to a forward loop for clarity.
// for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) {
......@@ -1346,7 +1337,7 @@ void PhaseCFG::global_code_motion() {
Node* proj = _matcher._null_check_tests[i];
Node* val = _matcher._null_check_tests[i + 1];
Block* block = get_block_for_node(proj);
implicit_null_check(block, proj, val, allowed_reasons);
implicit_null_check(block, proj, val, C->allowed_deopt_reasons());
// The implicit_null_check will only perform the transformation
// if the null branch is truly uncommon, *and* it leads to an
// uncommon trap. Combined with the too_many_traps guards
......
......@@ -315,6 +315,9 @@ public:
static const Pipeline *pipeline_class();
virtual const Pipeline *pipeline() const;
// Returns true if this node is a check that can be implemented with a trap.
virtual bool is_TrapBasedCheckNode() const { return false; }
#ifndef PRODUCT
virtual const char *Name() const = 0; // Machine-specific name
virtual void dump_spec(outputStream *st) const; // Print per-node info
......
......@@ -2395,6 +2395,69 @@ bool Matcher::post_store_load_barrier(const Node* vmb) {
return false;
}
// Check whether node n is a branch to an uncommon trap that we could
// optimize as test with very high branch costs in case of going to
// the uncommon trap. The code must be able to be recompiled to use
// a cheaper test.
bool Matcher::branches_to_uncommon_trap(const Node *n) {
// Don't do it for natives, adapters, or runtime stubs
Compile *C = Compile::current();
if (!C->is_method_compilation()) return false;
assert(n->is_If(), "You should only call this on if nodes.");
IfNode *ifn = n->as_If();
Node *ifFalse = NULL;
for (DUIterator_Fast imax, i = ifn->fast_outs(imax); i < imax; i++) {
if (ifn->fast_out(i)->is_IfFalse()) {
ifFalse = ifn->fast_out(i);
break;
}
}
assert(ifFalse, "An If should have an ifFalse. Graph is broken.");
Node *reg = ifFalse;
int cnt = 4; // We must protect against cycles. Limit to 4 iterations.
// Alternatively use visited set? Seems too expensive.
while (reg != NULL && cnt > 0) {
CallNode *call = NULL;
RegionNode *nxt_reg = NULL;
for (DUIterator_Fast imax, i = reg->fast_outs(imax); i < imax; i++) {
Node *o = reg->fast_out(i);
if (o->is_Call()) {
call = o->as_Call();
}
if (o->is_Region()) {
nxt_reg = o->as_Region();
}
}
if (call &&
call->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) {
const Type* trtype = call->in(TypeFunc::Parms)->bottom_type();
if (trtype->isa_int() && trtype->is_int()->is_con()) {
jint tr_con = trtype->is_int()->get_con();
Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con);
Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con);
assert((int)reason < (int)BitsPerInt, "recode bit map");
if (is_set_nth_bit(C->allowed_deopt_reasons(), (int)reason)
&& action != Deoptimization::Action_none) {
// This uncommon trap is sure to recompile, eventually.
// When that happens, C->too_many_traps will prevent
// this transformation from happening again.
return true;
}
}
}
reg = nxt_reg;
cnt--;
}
return false;
}
//=============================================================================
//---------------------------State---------------------------------------------
State::State(void) {
......
......@@ -485,6 +485,8 @@ public:
// retain the Node to act as a compiler ordering barrier.
static bool post_store_load_barrier(const Node* mb);
// Does n lead to an uncommon trap that can cause deoptimization?
static bool branches_to_uncommon_trap(const Node *n);
#ifdef ASSERT
void dump_old2new_map(); // machine-independent to machine-dependent
......
......@@ -1459,6 +1459,12 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
// Intel all the time, with add-to-memory kind of opcodes.
previous_offset = current_offset;
}
// Not an else-if!
// If this is a trap based cmp then add its offset to the list.
if (mach->is_TrapBasedCheckNode()) {
inct_starts[inct_cnt++] = current_offset;
}
}
// Verify that there is sufficient space remaining
......@@ -1725,6 +1731,12 @@ void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_start
_inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos());
continue;
}
// Handle implicit exception table updates: trap instructions.
if (n->is_Mach() && n->as_Mach()->is_TrapBasedCheckNode()) {
uint block_num = block->non_connector_successor(0)->_pre_order;
_inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos());
continue;
}
} // End of for all blocks fill in exception table entries
}
......
......@@ -2500,6 +2500,12 @@ class CommandLineFlags {
develop_pd(bool, ImplicitNullChecks, \
"Generate code for implicit null checks") \
\
product_pd(bool, TrapBasedNullChecks, \
"Generate code for null checks that uses a cmp and trap " \
"instruction raising SIGTRAP. This is only used if an access to" \
"null (+offset) will not raise a SIGSEGV, i.e.," \
"ImplicitNullChecks don't work (PPC64).") \
\
product(bool, PrintSafepointStatistics, false, \
"Print statistics about safepoint synchronization") \
\
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册