提交 76600cfa 编写于 作者: J jrose

6912064: type profiles need to be exploited more for dynamic language support

Reviewed-by: kvn
上级 5f85a401
...@@ -504,6 +504,7 @@ graphKit.hpp addnode.hpp ...@@ -504,6 +504,7 @@ graphKit.hpp addnode.hpp
graphKit.hpp callnode.hpp graphKit.hpp callnode.hpp
graphKit.hpp cfgnode.hpp graphKit.hpp cfgnode.hpp
graphKit.hpp ciEnv.hpp graphKit.hpp ciEnv.hpp
graphKit.hpp ciMethodData.hpp
graphKit.hpp divnode.hpp graphKit.hpp divnode.hpp
graphKit.hpp compile.hpp graphKit.hpp compile.hpp
graphKit.hpp deoptimization.hpp graphKit.hpp deoptimization.hpp
......
...@@ -2451,11 +2451,79 @@ Node* GraphKit::type_check_receiver(Node* receiver, ciKlass* klass, ...@@ -2451,11 +2451,79 @@ Node* GraphKit::type_check_receiver(Node* receiver, ciKlass* klass,
} }
//------------------------------seems_never_null-------------------------------
// Use null_seen information if it is available from the profile.
// If we see an unexpected null at a type check we record it and force a
// recompile; the offending check will be recompiled to handle NULLs.
// If we see several offending BCIs, then all checks in the
// method will be recompiled.
bool GraphKit::seems_never_null(Node* obj, ciProfileData* data) {
if (UncommonNullCast // Cutout for this technique
&& obj != null() // And not the -Xcomp stupid case?
&& !too_many_traps(Deoptimization::Reason_null_check)
) {
if (data == NULL)
// Edge case: no mature data. Be optimistic here.
return true;
// If the profile has not seen a null, assume it won't happen.
assert(java_bc() == Bytecodes::_checkcast ||
java_bc() == Bytecodes::_instanceof ||
java_bc() == Bytecodes::_aastore, "MDO must collect null_seen bit here");
return !data->as_BitData()->null_seen();
}
return false;
}
//------------------------maybe_cast_profiled_receiver-------------------------
// If the profile has seen exactly one type, narrow to exactly that type.
// Subsequent type checks will always fold up.
Node* GraphKit::maybe_cast_profiled_receiver(Node* not_null_obj,
ciProfileData* data,
ciKlass* require_klass) {
if (!UseTypeProfile || !TypeProfileCasts) return NULL;
if (data == NULL) return NULL;
// Make sure we haven't already deoptimized from this tactic.
if (too_many_traps(Deoptimization::Reason_class_check))
return NULL;
// (No, this isn't a call, but it's enough like a virtual call
// to use the same ciMethod accessor to get the profile info...)
ciCallProfile profile = method()->call_profile_at_bci(bci());
if (profile.count() >= 0 && // no cast failures here
profile.has_receiver(0) &&
profile.morphism() == 1) {
ciKlass* exact_kls = profile.receiver(0);
if (require_klass == NULL ||
static_subtype_check(require_klass, exact_kls) == SSC_always_true) {
// If we narrow the type to match what the type profile sees,
// we can then remove the rest of the cast.
// This is a win, even if the exact_kls is very specific,
// because downstream operations, such as method calls,
// will often benefit from the sharper type.
Node* exact_obj = not_null_obj; // will get updated in place...
Node* slow_ctl = type_check_receiver(exact_obj, exact_kls, 1.0,
&exact_obj);
{ PreserveJVMState pjvms(this);
set_control(slow_ctl);
uncommon_trap(Deoptimization::Reason_class_check,
Deoptimization::Action_maybe_recompile);
}
replace_in_map(not_null_obj, exact_obj);
return exact_obj;
}
// assert(ssc == SSC_always_true)... except maybe the profile lied to us.
}
return NULL;
}
//-------------------------------gen_instanceof-------------------------------- //-------------------------------gen_instanceof--------------------------------
// Generate an instance-of idiom. Used by both the instance-of bytecode // Generate an instance-of idiom. Used by both the instance-of bytecode
// and the reflective instance-of call. // and the reflective instance-of call.
Node* GraphKit::gen_instanceof( Node *subobj, Node* superklass ) { Node* GraphKit::gen_instanceof(Node* obj, Node* superklass) {
C->set_has_split_ifs(true); // Has chance for split-if optimization kill_dead_locals(); // Benefit all the uncommon traps
assert( !stopped(), "dead parse path should be checked in callers" ); assert( !stopped(), "dead parse path should be checked in callers" );
assert(!TypePtr::NULL_PTR->higher_equal(_gvn.type(superklass)->is_klassptr()), assert(!TypePtr::NULL_PTR->higher_equal(_gvn.type(superklass)->is_klassptr()),
"must check for not-null not-dead klass in callers"); "must check for not-null not-dead klass in callers");
...@@ -2466,9 +2534,16 @@ Node* GraphKit::gen_instanceof( Node *subobj, Node* superklass ) { ...@@ -2466,9 +2534,16 @@ Node* GraphKit::gen_instanceof( Node *subobj, Node* superklass ) {
Node* phi = new(C, PATH_LIMIT) PhiNode(region, TypeInt::BOOL); Node* phi = new(C, PATH_LIMIT) PhiNode(region, TypeInt::BOOL);
C->set_has_split_ifs(true); // Has chance for split-if optimization C->set_has_split_ifs(true); // Has chance for split-if optimization
ciProfileData* data = NULL;
if (java_bc() == Bytecodes::_instanceof) { // Only for the bytecode
data = method()->method_data()->bci_to_data(bci());
}
bool never_see_null = (ProfileDynamicTypes // aggressive use of profile
&& seems_never_null(obj, data));
// Null check; get casted pointer; set region slot 3 // Null check; get casted pointer; set region slot 3
Node* null_ctl = top(); Node* null_ctl = top();
Node* not_null_obj = null_check_oop(subobj, &null_ctl); Node* not_null_obj = null_check_oop(obj, &null_ctl, never_see_null);
// If not_null_obj is dead, only null-path is taken // If not_null_obj is dead, only null-path is taken
if (stopped()) { // Doing instance-of on a NULL? if (stopped()) { // Doing instance-of on a NULL?
...@@ -2477,6 +2552,23 @@ Node* GraphKit::gen_instanceof( Node *subobj, Node* superklass ) { ...@@ -2477,6 +2552,23 @@ Node* GraphKit::gen_instanceof( Node *subobj, Node* superklass ) {
} }
region->init_req(_null_path, null_ctl); region->init_req(_null_path, null_ctl);
phi ->init_req(_null_path, intcon(0)); // Set null path value phi ->init_req(_null_path, intcon(0)); // Set null path value
if (null_ctl == top()) {
// Do this eagerly, so that pattern matches like is_diamond_phi
// will work even during parsing.
assert(_null_path == PATH_LIMIT-1, "delete last");
region->del_req(_null_path);
phi ->del_req(_null_path);
}
if (ProfileDynamicTypes && data != NULL) {
Node* cast_obj = maybe_cast_profiled_receiver(not_null_obj, data, NULL);
if (stopped()) { // Profile disagrees with this path.
set_control(null_ctl); // Null is the only remaining possibility.
return intcon(0);
}
if (cast_obj != NULL)
not_null_obj = cast_obj;
}
// Load the object's klass // Load the object's klass
Node* obj_klass = load_object_klass(not_null_obj); Node* obj_klass = load_object_klass(not_null_obj);
...@@ -2546,20 +2638,8 @@ Node* GraphKit::gen_checkcast(Node *obj, Node* superklass, ...@@ -2546,20 +2638,8 @@ Node* GraphKit::gen_checkcast(Node *obj, Node* superklass,
C->set_has_split_ifs(true); // Has chance for split-if optimization C->set_has_split_ifs(true); // Has chance for split-if optimization
// Use null-cast information if it is available // Use null-cast information if it is available
bool never_see_null = false; bool never_see_null = ((failure_control == NULL) // regular case only
// If we see an unexpected null at a check-cast we record it and force a && seems_never_null(obj, data));
// recompile; the offending check-cast will be compiled to handle NULLs.
// If we see several offending BCIs, then all checkcasts in the
// method will be compiled to handle NULLs.
if (UncommonNullCast // Cutout for this technique
&& failure_control == NULL // regular case
&& obj != null() // And not the -Xcomp stupid case?
&& !too_many_traps(Deoptimization::Reason_null_check)) {
// Finally, check the "null_seen" bit from the interpreter.
if (data == NULL || !data->as_BitData()->null_seen()) {
never_see_null = true;
}
}
// Null check; get casted pointer; set region slot 3 // Null check; get casted pointer; set region slot 3
Node* null_ctl = top(); Node* null_ctl = top();
...@@ -2572,47 +2652,26 @@ Node* GraphKit::gen_checkcast(Node *obj, Node* superklass, ...@@ -2572,47 +2652,26 @@ Node* GraphKit::gen_checkcast(Node *obj, Node* superklass,
} }
region->init_req(_null_path, null_ctl); region->init_req(_null_path, null_ctl);
phi ->init_req(_null_path, null()); // Set null path value phi ->init_req(_null_path, null()); // Set null path value
if (null_ctl == top()) {
// Do this eagerly, so that pattern matches like is_diamond_phi
// will work even during parsing.
assert(_null_path == PATH_LIMIT-1, "delete last");
region->del_req(_null_path);
phi ->del_req(_null_path);
}
Node* cast_obj = NULL; // the casted version of the object Node* cast_obj = NULL;
if (data != NULL &&
// If the profile has seen exactly one type, narrow to that type.
// (The subsequent subtype check will always fold up.)
if (UseTypeProfile && TypeProfileCasts && data != NULL &&
// Counter has never been decremented (due to cast failure). // Counter has never been decremented (due to cast failure).
// ...This is a reasonable thing to expect. It is true of // ...This is a reasonable thing to expect. It is true of
// all casts inserted by javac to implement generic types. // all casts inserted by javac to implement generic types.
data->as_CounterData()->count() >= 0 && data->as_CounterData()->count() >= 0) {
!too_many_traps(Deoptimization::Reason_class_check)) { cast_obj = maybe_cast_profiled_receiver(not_null_obj, data, tk->klass());
// (No, this isn't a call, but it's enough like a virtual call if (cast_obj != NULL) {
// to use the same ciMethod accessor to get the profile info...) if (failure_control != NULL) // failure is now impossible
ciCallProfile profile = method()->call_profile_at_bci(bci()); (*failure_control) = top();
if (profile.count() >= 0 && // no cast failures here // adjust the type of the phi to the exact klass:
profile.has_receiver(0) && phi->raise_bottom_type(_gvn.type(cast_obj)->meet(TypePtr::NULL_PTR));
profile.morphism() == 1) {
ciKlass* exact_kls = profile.receiver(0);
int ssc = static_subtype_check(tk->klass(), exact_kls);
if (ssc == SSC_always_true) {
// If we narrow the type to match what the type profile sees,
// we can then remove the rest of the cast.
// This is a win, even if the exact_kls is very specific,
// because downstream operations, such as method calls,
// will often benefit from the sharper type.
Node* exact_obj = not_null_obj; // will get updated in place...
Node* slow_ctl = type_check_receiver(exact_obj, exact_kls, 1.0,
&exact_obj);
{ PreserveJVMState pjvms(this);
set_control(slow_ctl);
uncommon_trap(Deoptimization::Reason_class_check,
Deoptimization::Action_maybe_recompile);
}
if (failure_control != NULL) // failure is now impossible
(*failure_control) = top();
replace_in_map(not_null_obj, exact_obj);
// adjust the type of the phi to the exact klass:
phi->raise_bottom_type(_gvn.type(exact_obj)->meet(TypePtr::NULL_PTR));
cast_obj = exact_obj;
}
// assert(cast_obj != NULL)... except maybe the profile lied to us.
} }
} }
......
...@@ -341,6 +341,14 @@ class GraphKit : public Phase { ...@@ -341,6 +341,14 @@ class GraphKit : public Phase {
Node* null_check_oop(Node* value, Node* *null_control, Node* null_check_oop(Node* value, Node* *null_control,
bool never_see_null = false); bool never_see_null = false);
// Check the null_seen bit.
bool seems_never_null(Node* obj, ciProfileData* data);
// Use the type profile to narrow an object type.
Node* maybe_cast_profiled_receiver(Node* not_null_obj,
ciProfileData* data,
ciKlass* require_klass);
// Cast obj to not-null on this path // Cast obj to not-null on this path
Node* cast_not_null(Node* obj, bool do_replace_in_map = true); Node* cast_not_null(Node* obj, bool do_replace_in_map = true);
// Replace all occurrences of one node by another. // Replace all occurrences of one node by another.
......
...@@ -906,7 +906,8 @@ bool LibraryCallKit::inline_string_equals() { ...@@ -906,7 +906,8 @@ bool LibraryCallKit::inline_string_equals() {
const int count_offset = java_lang_String::count_offset_in_bytes(); const int count_offset = java_lang_String::count_offset_in_bytes();
const int offset_offset = java_lang_String::offset_offset_in_bytes(); const int offset_offset = java_lang_String::offset_offset_in_bytes();
_sp += 2; int nargs = 2;
_sp += nargs;
Node* argument = pop(); // pop non-receiver first: it was pushed second Node* argument = pop(); // pop non-receiver first: it was pushed second
Node* receiver = pop(); Node* receiver = pop();
...@@ -914,11 +915,11 @@ bool LibraryCallKit::inline_string_equals() { ...@@ -914,11 +915,11 @@ bool LibraryCallKit::inline_string_equals() {
// null check technically happens in the wrong place, which can lead to // null check technically happens in the wrong place, which can lead to
// invalid stack traces when string compare is inlined into a method // invalid stack traces when string compare is inlined into a method
// which handles NullPointerExceptions. // which handles NullPointerExceptions.
_sp += 2; _sp += nargs;
receiver = do_null_check(receiver, T_OBJECT); receiver = do_null_check(receiver, T_OBJECT);
//should not do null check for argument for String.equals(), because spec //should not do null check for argument for String.equals(), because spec
//allows to specify NULL as argument. //allows to specify NULL as argument.
_sp -= 2; _sp -= nargs;
if (stopped()) { if (stopped()) {
return true; return true;
...@@ -943,7 +944,9 @@ bool LibraryCallKit::inline_string_equals() { ...@@ -943,7 +944,9 @@ bool LibraryCallKit::inline_string_equals() {
ciInstanceKlass* klass = env()->String_klass(); ciInstanceKlass* klass = env()->String_klass();
if (!stopped()) { if (!stopped()) {
_sp += nargs; // gen_instanceof might do an uncommon trap
Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass))); Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass)));
_sp -= nargs;
Node* cmp = _gvn.transform(new (C, 3) CmpINode(inst, intcon(1))); Node* cmp = _gvn.transform(new (C, 3) CmpINode(inst, intcon(1)));
Node* bol = _gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::ne)); Node* bol = _gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::ne));
...@@ -2935,7 +2938,9 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) { ...@@ -2935,7 +2938,9 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) {
switch (id) { switch (id) {
case vmIntrinsics::_isInstance: case vmIntrinsics::_isInstance:
// nothing is an instance of a primitive type // nothing is an instance of a primitive type
_sp += nargs; // gen_instanceof might do an uncommon trap
query_value = gen_instanceof(obj, kls); query_value = gen_instanceof(obj, kls);
_sp -= nargs;
break; break;
case vmIntrinsics::_getModifiers: case vmIntrinsics::_getModifiers:
......
...@@ -494,6 +494,7 @@ class Parse : public GraphKit { ...@@ -494,6 +494,7 @@ class Parse : public GraphKit {
float dynamic_branch_prediction(float &cnt); float dynamic_branch_prediction(float &cnt);
float branch_prediction(float &cnt, BoolTest::mask btest, int target_bci); float branch_prediction(float &cnt, BoolTest::mask btest, int target_bci);
bool seems_never_taken(float prob); bool seems_never_taken(float prob);
bool seems_stable_comparison(BoolTest::mask btest, Node* c);
void do_ifnull(BoolTest::mask btest, Node* c); void do_ifnull(BoolTest::mask btest, Node* c);
void do_if(BoolTest::mask btest, Node* c); void do_if(BoolTest::mask btest, Node* c);
......
...@@ -892,6 +892,62 @@ bool Parse::seems_never_taken(float prob) { ...@@ -892,6 +892,62 @@ bool Parse::seems_never_taken(float prob) {
return prob < PROB_MIN; return prob < PROB_MIN;
} }
// True if the comparison seems to be the kind that will not change its
// statistics from true to false. See comments in adjust_map_after_if.
// This question is only asked along paths which are already
// classifed as untaken (by seems_never_taken), so really,
// if a path is never taken, its controlling comparison is
// already acting in a stable fashion. If the comparison
// seems stable, we will put an expensive uncommon trap
// on the untaken path. To be conservative, and to allow
// partially executed counted loops to be compiled fully,
// we will plant uncommon traps only after pointer comparisons.
bool Parse::seems_stable_comparison(BoolTest::mask btest, Node* cmp) {
for (int depth = 4; depth > 0; depth--) {
// The following switch can find CmpP here over half the time for
// dynamic language code rich with type tests.
// Code using counted loops or array manipulations (typical
// of benchmarks) will have many (>80%) CmpI instructions.
switch (cmp->Opcode()) {
case Op_CmpP:
// A never-taken null check looks like CmpP/BoolTest::eq.
// These certainly should be closed off as uncommon traps.
if (btest == BoolTest::eq)
return true;
// A never-failed type check looks like CmpP/BoolTest::ne.
// Let's put traps on those, too, so that we don't have to compile
// unused paths with indeterminate dynamic type information.
if (ProfileDynamicTypes)
return true;
return false;
case Op_CmpI:
// A small minority (< 10%) of CmpP are masked as CmpI,
// as if by boolean conversion ((p == q? 1: 0) != 0).
// Detect that here, even if it hasn't optimized away yet.
// Specifically, this covers the 'instanceof' operator.
if (btest == BoolTest::ne || btest == BoolTest::eq) {
if (_gvn.type(cmp->in(2))->singleton() &&
cmp->in(1)->is_Phi()) {
PhiNode* phi = cmp->in(1)->as_Phi();
int true_path = phi->is_diamond_phi();
if (true_path > 0 &&
_gvn.type(phi->in(1))->singleton() &&
_gvn.type(phi->in(2))->singleton()) {
// phi->region->if_proj->ifnode->bool->cmp
BoolNode* bol = phi->in(0)->in(1)->in(0)->in(1)->as_Bool();
btest = bol->_test._test;
cmp = bol->in(1);
continue;
}
}
}
return false;
}
}
return false;
}
//-------------------------------repush_if_args-------------------------------- //-------------------------------repush_if_args--------------------------------
// Push arguments of an "if" bytecode back onto the stack by adjusting _sp. // Push arguments of an "if" bytecode back onto the stack by adjusting _sp.
inline int Parse::repush_if_args() { inline int Parse::repush_if_args() {
...@@ -1137,19 +1193,22 @@ void Parse::adjust_map_after_if(BoolTest::mask btest, Node* c, float prob, ...@@ -1137,19 +1193,22 @@ void Parse::adjust_map_after_if(BoolTest::mask btest, Node* c, float prob,
bool is_fallthrough = (path == successor_for_bci(iter().next_bci())); bool is_fallthrough = (path == successor_for_bci(iter().next_bci()));
int cop = c->Opcode(); if (seems_never_taken(prob) && seems_stable_comparison(btest, c)) {
if (seems_never_taken(prob) && cop == Op_CmpP && btest == BoolTest::eq) {
// (An earlier version of do_if omitted '&& btest == BoolTest::eq'.)
//
// If this might possibly turn into an implicit null check, // If this might possibly turn into an implicit null check,
// and the null has never yet been seen, we need to generate // and the null has never yet been seen, we need to generate
// an uncommon trap, so as to recompile instead of suffering // an uncommon trap, so as to recompile instead of suffering
// with very slow branches. (We'll get the slow branches if // with very slow branches. (We'll get the slow branches if
// the program ever changes phase and starts seeing nulls here.) // the program ever changes phase and starts seeing nulls here.)
// //
// The tests we worry about are of the form (p == null). // We do not inspect for a null constant, since a node may
// We do not simply inspect for a null constant, since a node may
// optimize to 'null' later on. // optimize to 'null' later on.
//
// Null checks, and other tests which expect inequality,
// show btest == BoolTest::eq along the non-taken branch.
// On the other hand, type tests, must-be-null tests,
// and other tests which expect pointer equality,
// show btest == BoolTest::ne along the non-taken branch.
// We prune both types of branches if they look unused.
repush_if_args(); repush_if_args();
// We need to mark this branch as taken so that if we recompile we will // We need to mark this branch as taken so that if we recompile we will
// see that it is possible. In the tiered system the interpreter doesn't // see that it is possible. In the tiered system the interpreter doesn't
......
...@@ -119,7 +119,11 @@ void Parse::do_instanceof() { ...@@ -119,7 +119,11 @@ void Parse::do_instanceof() {
} }
// Push the bool result back on stack // Push the bool result back on stack
push( gen_instanceof( pop(), makecon(TypeKlassPtr::make(klass)) ) ); Node* res = gen_instanceof(peek(), makecon(TypeKlassPtr::make(klass)));
// Pop from stack AFTER gen_instanceof because it can uncommon trap.
pop();
push(res);
} }
//------------------------------array_store_check------------------------------ //------------------------------array_store_check------------------------------
......
...@@ -2476,6 +2476,9 @@ class CommandLineFlags { ...@@ -2476,6 +2476,9 @@ class CommandLineFlags {
develop(bool, MonomorphicArrayCheck, true, \ develop(bool, MonomorphicArrayCheck, true, \
"Uncommon-trap array store checks that require full type check") \ "Uncommon-trap array store checks that require full type check") \
\ \
diagnostic(bool, ProfileDynamicTypes, true, \
"do extra type profiling and use it more aggressively") \
\
develop(bool, DelayCompilationDuringStartup, true, \ develop(bool, DelayCompilationDuringStartup, true, \
"Delay invoking the compiler until main application class is " \ "Delay invoking the compiler until main application class is " \
"loaded") \ "loaded") \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册