提交 8f522e28 编写于 作者: Z zmajo

8068945: Use RBP register as proper frame pointer in JIT compiled code on x86

Summary: Introduce the PreserveFramePointer flag to control if RBP is used as the frame pointer or as a general purpose register.
Reviewed-by: kvn, roland, dlong, enevill, shade
上级 fae859ba
......@@ -314,26 +314,17 @@ public class X86Frame extends Frame {
//------------------------------------------------------------------------------
// frame::adjust_unextended_sp
private void adjustUnextendedSP() {
// If we are returning to a compiled MethodHandle call site, the
// saved_fp will in fact be a saved value of the unextended SP. The
// simplest way to tell whether we are returning to such a call site
// is as follows:
// On x86, sites calling method handle intrinsics and lambda forms are treated
// as any other call site. Therefore, no special action is needed when we are
// returning to any of these call sites.
CodeBlob cb = cb();
NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
if (senderNm != null) {
// If the sender PC is a deoptimization point, get the original
// PC. For MethodHandle call site the unextended_sp is stored in
// saved_fp.
if (senderNm.isDeoptMhEntry(getPC())) {
// DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP()));
raw_unextendedSP = getFP();
}
else if (senderNm.isDeoptEntry(getPC())) {
// DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp));
}
else if (senderNm.isMethodHandleReturn(getPC())) {
raw_unextendedSP = getFP();
// If the sender PC is a deoptimization point, get the original PC.
if (senderNm.isDeoptEntry(getPC()) ||
senderNm.isDeoptMhEntry(getPC())) {
// DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
}
}
}
......
......@@ -68,6 +68,8 @@ define_pd_global(bool, RewriteFrequentPairs, false);
define_pd_global(bool, UseMembar, true);
define_pd_global(bool, PreserveFramePointer, false);
// GC Ergo Flags
define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread
......
......@@ -55,6 +55,8 @@ define_pd_global(bool, RewriteFrequentPairs, true);
define_pd_global(bool, UseMembar, false);
define_pd_global(bool, PreserveFramePointer, false);
// GC Ergo Flags
define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // Default max size of CMS young gen, per GC worker thread.
......
......@@ -74,6 +74,8 @@ define_pd_global(bool, RewriteFrequentPairs, true);
define_pd_global(bool, UseMembar, false);
define_pd_global(bool, PreserveFramePointer, false);
// GC Ergo Flags
define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // default max size of CMS young gen, per GC worker thread
......
......@@ -142,8 +142,10 @@ REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved
#endif // _LP64
// JSR 292 fixed register usages:
REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp);
// JSR 292
// On x86, the SP does not have to be saved when invoking method handle intrinsics
// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg);
// Address is an abstraction used to represent a memory location
// using any of the amd64 addressing modes with one object.
......
......@@ -343,14 +343,13 @@ LIR_Opr FrameMap::stack_pointer() {
return FrameMap::rsp_opr;
}
// JSR 292
// On x86, there is no need to save the SP, because neither
// method handle intrinsics, nor compiled lambda forms modify it.
LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
assert(rbp == rbp_mh_SP_save, "must be same register");
return rbp_opr;
return LIR_OprFact::illegalOpr;
}
bool FrameMap::validate_frame() {
return true;
}
......@@ -360,6 +360,9 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by
generate_stack_overflow_check(bang_size_in_bytes);
push(rbp);
if (PreserveFramePointer) {
mov(rbp, rsp);
}
#ifdef TIERED
// c2 leaves fpu stack dirty. Clean it on entry
if (UseSSE < 2 ) {
......
......@@ -754,14 +754,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
// WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP
// since we do a leave anyway.
// Pop the return address since we are possibly changing SP (restoring from BP).
// Pop the return address.
__ leave();
__ pop(rcx);
// Restore SP from BP if the exception PC is a method handle call site.
NOT_LP64(__ get_thread(thread);)
__ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
__ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
__ jmp(rcx); // jump to exception handler
break;
default: ShouldNotReachHere();
......@@ -832,11 +827,6 @@ void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
// the pop is also necessary to simulate the effect of a ret(0)
__ pop(exception_pc);
// Restore SP from BP if the exception PC is a method handle call site.
NOT_LP64(__ get_thread(thread);)
__ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
__ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
// continue at exception handler (return address removed)
// note: do *not* remove arguments when unwinding the
// activation since the caller assumes having
......
......@@ -224,7 +224,8 @@ bool frame::safe_for_sender(JavaThread *thread) {
if (sender_blob->is_nmethod()) {
nmethod* nm = sender_blob->as_nmethod_or_null();
if (nm != NULL) {
if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) {
if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
nm->method()->is_method_handle_intrinsic()) {
return false;
}
}
......@@ -391,10 +392,9 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const {
// frame::verify_deopt_original_pc
//
// Verifies the calculated original PC of a deoptimization PC for the
// given unextended SP. The unextended SP might also be the saved SP
// for MethodHandle call sites.
// given unextended SP.
#ifdef ASSERT
void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp) {
frame fr;
// This is ugly but it's better than to change {get,set}_original_pc
......@@ -404,33 +404,23 @@ void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool
address original_pc = nm->get_original_pc(&fr);
assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
}
#endif
//------------------------------------------------------------------------------
// frame::adjust_unextended_sp
void frame::adjust_unextended_sp() {
// If we are returning to a compiled MethodHandle call site, the
// saved_fp will in fact be a saved value of the unextended SP. The
// simplest way to tell whether we are returning to such a call site
// is as follows:
// On x86, sites calling method handle intrinsics and lambda forms are treated
// as any other call site. Therefore, no special action is needed when we are
// returning to any of these call sites.
nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
if (sender_nm != NULL) {
// If the sender PC is a deoptimization point, get the original
// PC. For MethodHandle call site the unextended_sp is stored in
// saved_fp.
if (sender_nm->is_deopt_mh_entry(_pc)) {
DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp));
_unextended_sp = _fp;
}
else if (sender_nm->is_deopt_entry(_pc)) {
// If the sender PC is a deoptimization point, get the original PC.
if (sender_nm->is_deopt_entry(_pc) ||
sender_nm->is_deopt_mh_entry(_pc)) {
DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
}
else if (sender_nm->is_method_handle_return(_pc)) {
_unextended_sp = _fp;
}
}
}
......
......@@ -76,11 +76,11 @@
// [locals and parameters ]
// <- sender sp
// [1] When the c++ interpreter calls a new method it returns to the frame
// [1] When the C++ interpreter calls a new method it returns to the frame
// manager which allocates a new frame on the stack. In that case there
// is no real callee of this newly allocated frame. The frame manager is
// aware of the additional frame(s) and will pop them as nested calls
// complete. Howevers tTo make it look good in the debugger the frame
// aware of the additional frame(s) and will pop them as nested calls
// complete. However, to make it look good in the debugger the frame
// manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
// with a fake interpreter_state* parameter to make it easy to debug
// nested calls.
......@@ -88,7 +88,7 @@
// Note that contrary to the layout for the assembly interpreter the
// expression stack allocated for the C++ interpreter is full sized.
// However this is not as bad as it seems as the interpreter frame_manager
// will truncate the unused space on succesive method calls.
// will truncate the unused space on successive method calls.
//
// ------------------------------ C++ interpreter ----------------------------------------
......@@ -167,10 +167,7 @@
#ifdef ASSERT
// Used in frame::sender_for_{interpreter,compiled}_frame
static void verify_deopt_original_pc( nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) {
verify_deopt_original_pc(nm, unextended_sp, true);
}
static void verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp);
#endif
public:
......
......@@ -94,7 +94,7 @@ inline frame::frame(intptr_t* sp, intptr_t* fp) {
// find_blob call. This is also why we can have no asserts on the validity
// of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
// -> pd_last_frame should use a specialized version of pd_last_frame which could
// call a specilaized frame constructor instead of this one.
// call a specialized frame constructor instead of this one.
// Then we could use the assert below. However this assert is of somewhat dubious
// value.
// assert(_pc != NULL, "no pc?");
......
......@@ -82,6 +82,8 @@ define_pd_global(size_t, CMSYoungGenPerWorker, 64*M); // default max size of CM
define_pd_global(uintx, TypeProfileLevel, 111);
define_pd_global(bool, PreserveFramePointer, false);
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
\
develop(bool, IEEEPrecision, true, \
......
......@@ -6090,6 +6090,10 @@ void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_
// We always push rbp, so that on return to interpreter rbp, will be
// restored correctly and we can correct the stack.
push(rbp);
// Save caller's stack pointer into RBP if the frame pointer is preserved.
if (PreserveFramePointer) {
mov(rbp, rsp);
}
// Remove word for ebp
framesize -= wordSize;
......@@ -6104,6 +6108,11 @@ void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_
// Save RBP register now.
framesize -= wordSize;
movptr(Address(rsp, framesize), rbp);
// Save caller's stack pointer into RBP if the frame pointer is preserved.
if (PreserveFramePointer) {
movptr(rbp, rsp);
addptr(rbp, framesize + wordSize);
}
}
if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
......
......@@ -374,7 +374,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
// member_reg - MemberName that was the trailing argument
// temp1_recv_klass - klass of stacked receiver, if needed
// rsi/r13 - interpreter linkage (if interpreted)
// rcx, rdx, rsi, rdi, r8, r8 - compiler arguments (if compiled)
// rcx, rdx, rsi, rdi, r8 - compiler arguments (if compiled)
Label L_incompatible_class_change_error;
switch (iid) {
......
......@@ -126,10 +126,6 @@ void OptoRuntime::generate_exception_blob() {
// rax: exception handler for given <exception oop/exception pc>
// Restore SP from BP if the exception PC is a MethodHandle call site.
__ cmpl(Address(rcx, JavaThread::is_method_handle_return_offset()), 0);
__ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
// We have a handler in rax, (could be deopt blob)
// rdx - throwing pc, deopt blob will need it.
......
......@@ -3393,8 +3393,8 @@ void OptoRuntime::generate_exception_blob() {
// Save callee-saved registers. See x86_64.ad.
// rbp is an implicitly saved callee saved register (i.e. the calling
// convention will save restore it in prolog/epilog) Other than that
// rbp is an implicitly saved callee saved register (i.e., the calling
// convention will save/restore it in the prolog/epilog). Other than that
// there are no callee save registers now that adapter frames are gone.
__ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
......@@ -3436,9 +3436,9 @@ void OptoRuntime::generate_exception_blob() {
// Restore callee-saved registers
// rbp is an implicitly saved callee saved register (i.e. the calling
// rbp is an implicitly saved callee-saved register (i.e., the calling
// convention will save restore it in prolog/epilog) Other than that
// there are no callee save registers no that adapter frames are gone.
// there are no callee save registers now that adapter frames are gone.
__ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
......@@ -3447,10 +3447,6 @@ void OptoRuntime::generate_exception_blob() {
// rax: exception handler
// Restore SP from BP if the exception PC is a MethodHandle call site.
__ cmpl(Address(r15_thread, JavaThread::is_method_handle_return_offset()), 0);
__ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
// We have a handler in rax (could be deopt blob).
__ mov(r8, rax);
......
......@@ -930,21 +930,6 @@ static inline jdouble replicate8_imm(int con, int width) {
encode %{
enc_class preserve_SP %{
debug_only(int off0 = cbuf.insts_size());
MacroAssembler _masm(&cbuf);
// RBP is preserved across all calls, even compiled calls.
// Use it to preserve RSP in places where the callee might change the SP.
__ movptr(rbp_mh_SP_save, rsp);
debug_only(int off1 = cbuf.insts_size());
assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
%}
enc_class restore_SP %{
MacroAssembler _masm(&cbuf);
__ movptr(rsp, rbp_mh_SP_save);
%}
enc_class call_epilog %{
if (VerifyStackAtCalls) {
// Check that stack depth is unchanged: find majik cookie on stack
......
......@@ -123,50 +123,94 @@ alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//
// Class for no registers (empty set).
reg_class no_reg();
// Class for all registers
reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
// Class for all registers (excluding EBP)
reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
// Dynamic register class that selects at runtime between register classes
// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
// Class for general registers
reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
// Class for general registers which may be used for implicit null checks on win95
// Also safe for use by tailjump. We don't want to allocate in rbp,
reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
// Class for general registers (excluding EBP).
// This register class can be used for implicit null checks on win95.
// It is also safe for use by tailjumps (we don't want to allocate in ebp).
// Used also if the PreserveFramePointer flag is true.
reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
// Dynamic register class that selects between int_reg and int_reg_no_ebp.
reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
// Class of "X" registers
reg_class int_x_reg(EBX, ECX, EDX, EAX);
// Class of registers that can appear in an address with no offset.
// EBP and ESP require an extra instruction byte for zero offset.
// Used in fast-unlock
reg_class p_reg(EDX, EDI, ESI, EBX);
// Class for general registers not including ECX
reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
// Class for general registers not including EAX
// Class for general registers excluding ECX
reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
// Class for general registers excluding ECX (and EBP)
reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
// Class for general registers excluding EAX
reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
// Class for general registers not including EAX or EBX.
reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
// Class for general registers excluding EAX and EBX.
reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
// Class for general registers excluding EAX and EBX (and EBP)
reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
// Class of EAX (for multiply and divide operations)
reg_class eax_reg(EAX);
// Class of EBX (for atomic add)
reg_class ebx_reg(EBX);
// Class of ECX (for shift and JCXZ operations and cmpLTMask)
reg_class ecx_reg(ECX);
// Class of EDX (for multiply and divide operations)
reg_class edx_reg(EDX);
// Class of EDI (for synchronization)
reg_class edi_reg(EDI);
// Class of ESI (for synchronization)
reg_class esi_reg(ESI);
// Singleton class for interpreter's stack pointer
reg_class ebp_reg(EBP);
// Singleton class for stack pointer
reg_class sp_reg(ESP);
// Singleton class for instruction pointer
// reg_class ip_reg(EIP);
// Class of integer register pairs
reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
// Class of integer register pairs (excluding EBP and EDI);
reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
// Dynamic register class that selects between long_reg and long_reg_no_ebp.
reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
// Class of integer register pairs that aligns with calling convention
reg_class eadx_reg( EAX,EDX );
reg_class ebcx_reg( ECX,EBX );
// Not AX or DX, used in divides
reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
// Not AX or DX (and neither EBP), used in divides
reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
// Floating point registers. Notice FPR0 is not a choice.
// FPR0 is not ever allocated; we use clever encodings to fake
......@@ -240,18 +284,11 @@ static int pre_call_resets_size() {
return size;
}
static int preserve_SP_size() {
return 2; // op, rm(reg/reg)
}
// !!!!! Special hack to get all type of calls to specify the byte offset
// from the start of the call to the point where the return address
// will point.
int MachCallStaticJavaNode::ret_addr_offset() {
int offset = 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points
if (_method_handle_invoke)
offset += preserve_SP_size();
return offset;
return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points
}
int MachCallDynamicJavaNode::ret_addr_offset() {
......@@ -283,15 +320,6 @@ int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
return round_to(current_offset, alignment_required()) - current_offset;
}
// The address of the call instruction needs to be 4-byte aligned to
// ensure that it does not span a cache line so that it can be patched.
int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
current_offset += pre_call_resets_size(); // skip fldcw, if any
current_offset += preserve_SP_size(); // skip mov rbp, rsp
current_offset += 1; // skip call opcode byte
return round_to(current_offset, alignment_required()) - current_offset;
}
// The address of the call instruction needs to be 4-byte aligned to
// ensure that it does not span a cache line so that it can be patched.
int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
......@@ -523,6 +551,10 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
st->print("# stack bang (%d bytes)", bangsize);
st->print("\n\t");
st->print("PUSH EBP\t# Save EBP");
if (PreserveFramePointer) {
st->print("\n\t");
st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
}
if (framesize) {
st->print("\n\t");
st->print("SUB ESP, #%d\t# Create frame",framesize);
......@@ -532,6 +564,10 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
st->print("\n\t");
framesize -= wordSize;
st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize);
if (PreserveFramePointer) {
st->print("\n\t");
st->print("MOV EBP, [ESP + #%d]\t# Save the caller's SP into EBP", (framesize + wordSize));
}
}
if (VerifyStackAtCalls) {
......@@ -1489,7 +1525,7 @@ RegMask Matcher::modL_proj_mask() {
}
const RegMask Matcher::method_handle_invoke_SP_save_mask() {
return EBP_REG_mask();
return NO_REG_mask();
}
// Returns true if the high 32 bits of the value is known to be zero.
......@@ -3735,7 +3771,7 @@ operand eRegP() %{
// On windows95, EBP is not safe to use for implicit null tests.
operand eRegP_no_EBP() %{
constraint(ALLOC_IN_RC(int_reg_no_rbp));
constraint(ALLOC_IN_RC(int_reg_no_ebp));
match(RegP);
match(eAXRegP);
match(eBXRegP);
......@@ -3824,13 +3860,6 @@ operand eDIRegP(eRegP reg) %{
interface(REG_INTER);
%}
operand eBPRegP() %{
constraint(ALLOC_IN_RC(ebp_reg));
match(RegP);
format %{ "EBP" %}
interface(REG_INTER);
%}
operand eRegL() %{
constraint(ALLOC_IN_RC(long_reg));
match(RegL);
......@@ -12615,7 +12644,6 @@ instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst,
// compute_padding() functions will have to be adjusted.
instruct CallStaticJavaDirect(method meth) %{
match(CallStaticJava);
predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
effect(USE meth);
ins_cost(300);
......@@ -12629,29 +12657,6 @@ instruct CallStaticJavaDirect(method meth) %{
ins_alignment(4);
%}
// Call Java Static Instruction (method handle version)
// Note: If this code changes, the corresponding ret_addr_offset() and
// compute_padding() functions will have to be adjusted.
instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
match(CallStaticJava);
predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
effect(USE meth);
// EBP is saved by all callees (for interpreter stack correction).
// We use it here for a similar purpose, in {preserve,restore}_SP.
ins_cost(300);
format %{ "CALL,static/MethodHandle " %}
opcode(0xE8); /* E8 cd */
ins_encode( pre_call_resets,
preserve_SP,
Java_Static_Call( meth ),
restore_SP,
call_epilog,
post_call_FPU );
ins_pipe( pipe_slow );
ins_alignment(4);
%}
// Call Java Dynamic Instruction
// Note: If this code changes, the corresponding ret_addr_offset() and
// compute_padding() functions will have to be adjusted.
......
此差异已折叠。
......@@ -4083,7 +4083,7 @@ bool GraphBuilder::try_method_handle_inline(ciMethod* callee) {
ValueType* type = apop()->type();
if (type->is_constant()) {
ciMethod* target = type->as_ObjectType()->constant_value()->as_member_name()->get_vmtarget();
// If the target is another method handle invoke try recursivly to get
// If the target is another method handle invoke, try to recursively get
// a better target.
if (target->is_method_handle_intrinsic()) {
if (try_method_handle_inline(target)) {
......
......@@ -458,7 +458,7 @@ void LIR_OpRTCall::verify() const {
//-------------------visits--------------------------
// complete rework of LIR instruction visitor.
// The virtual calls for each instruction type is replaced by a big
// The virtual call for each instruction type is replaced by a big
// switch that adds the operands for each instruction
void LIR_OpVisitState::visit(LIR_Op* op) {
......@@ -825,7 +825,8 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
}
if (opJavaCall->_info) do_info(opJavaCall->_info);
if (opJavaCall->is_method_handle_invoke()) {
if (FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr &&
opJavaCall->is_method_handle_invoke()) {
opJavaCall->_method_handle_invoke_SP_save_opr = FrameMap::method_handle_invoke_SP_save_opr();
do_temp(opJavaCall->_method_handle_invoke_SP_save_opr);
}
......
......@@ -1219,10 +1219,8 @@ class LIR_OpJavaCall: public LIR_OpCall {
// JSR 292 support.
bool is_invokedynamic() const { return code() == lir_dynamic_call; }
bool is_method_handle_invoke() const {
return
method()->is_compiled_lambda_form() // Java-generated adapter
||
method()->is_method_handle_intrinsic(); // JVM-generated MH intrinsic
return method()->is_compiled_lambda_form() || // Java-generated lambda form
method()->is_method_handle_intrinsic(); // JVM-generated MH intrinsic
}
intptr_t vtable_offset() const {
......
......@@ -2875,7 +2875,7 @@ LIRItemList* LIRGenerator::invoke_visit_arguments(Invoke* x) {
// g) lock result registers and emit call operation
//
// Before issuing a call, we must spill-save all values on stack
// that are in caller-save register. "spill-save" moves thos registers
// that are in caller-save register. "spill-save" moves those registers
// either in a free callee-save register or spills them if no free
// callee save register is available.
//
......@@ -2883,7 +2883,7 @@ LIRItemList* LIRGenerator::invoke_visit_arguments(Invoke* x) {
// - if invoked between e) and f), we may lock callee save
// register in "spill-save" that destroys the receiver register
// before f) is executed
// - if we rearange the f) to be earlier, by loading %o0, it
// - if we rearrange f) to be earlier (by loading %o0) it
// may destroy a value on the stack that is currently in %o0
// and is waiting to be spilled
// - if we keep the receiver locked while doing spill-save,
......@@ -2916,14 +2916,16 @@ void LIRGenerator::do_Invoke(Invoke* x) {
assert(receiver->is_illegal() || receiver->is_equal(LIR_Assembler::receiverOpr()), "must match");
// JSR 292
// Preserve the SP over MethodHandle call sites.
// Preserve the SP over MethodHandle call sites, if needed.
ciMethod* target = x->target();
bool is_method_handle_invoke = (// %%% FIXME: Are both of these relevant?
target->is_method_handle_intrinsic() ||
target->is_compiled_lambda_form());
if (is_method_handle_invoke) {
info->set_is_method_handle_invoke(true);
__ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr());
if(FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr) {
__ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr());
}
}
switch (x->code()) {
......@@ -2963,8 +2965,9 @@ void LIRGenerator::do_Invoke(Invoke* x) {
}
// JSR 292
// Restore the SP after MethodHandle call sites.
if (is_method_handle_invoke) {
// Restore the SP after MethodHandle call sites, if needed.
if (is_method_handle_invoke
&& FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr) {
__ move(FrameMap::method_handle_invoke_SP_save_opr(), FrameMap::stack_pointer());
}
......
......@@ -631,11 +631,11 @@ InlineTree *InlineTree::build_inline_tree_for_callee( ciMethod* callee_method, J
}
int max_inline_level_adjust = 0;
if (caller_jvms->method() != NULL) {
if (caller_jvms->method()->is_compiled_lambda_form())
if (caller_jvms->method()->is_compiled_lambda_form()) {
max_inline_level_adjust += 1; // don't count actions in MH or indy adapter frames
else if (callee_method->is_method_handle_intrinsic() ||
callee_method->is_compiled_lambda_form()) {
max_inline_level_adjust += 1; // don't count method handle calls from java.lang.invoke implem
} else if (callee_method->is_method_handle_intrinsic() ||
callee_method->is_compiled_lambda_form()) {
max_inline_level_adjust += 1; // don't count method handle calls from java.lang.invoke implementation
}
if (max_inline_level_adjust != 0 && C->print_inlining() && (Verbose || WizardMode)) {
CompileTask::print_inline_indent(inline_level());
......
......@@ -171,8 +171,27 @@ static bool is_decipherable_compiled_frame(JavaThread* thread, frame* fr, nmetho
// Now do we have a useful PcDesc?
if (pc_desc == NULL ||
pc_desc->scope_decode_offset() == DebugInformationRecorder::serialized_null) {
// No debug information available for this pc
// vframeStream would explode if we try and walk the frames.
// No debug information is available for this PC.
//
// vframeStreamCommon::fill_from_frame() will decode the frame depending
// on the state of the thread.
//
// Case #1: If the thread is in Java (state == _thread_in_Java), then
// the vframeStreamCommon object will be filled as if the frame were a native
// compiled frame. Therefore, no debug information is needed.
//
// Case #2: If the thread is in any other state, then two steps will be performed:
// - if asserts are enabled, found_bad_method_frame() will be called and
// the assert in found_bad_method_frame() will be triggered;
// - if asserts are disabled, the vframeStreamCommon object will be filled
// as if it were a native compiled frame.
//
// Case (2) is similar to the way interpreter frames are processed in
// vframeStreamCommon::fill_from_interpreter_frame in case no valid BCI
// was found for an interpreted frame. If asserts are enabled, the assert
// in found_bad_method_frame() will be triggered. If asserts are disabled,
// the vframeStreamCommon object will be filled afterwards as if the
// interpreter were at the point of entering into the method.
return false;
}
......@@ -229,9 +248,10 @@ static bool is_decipherable_interpreted_frame(JavaThread* thread,
// a valid method. Then again we may have caught an interpreter
// frame in the middle of construction and the bci field is
// not yet valid.
*method_p = method;
if (!method->is_valid_method()) return false;
*method_p = method; // If the Method* found is invalid, it is
// ignored by forte_fill_call_trace_given_top().
// So set method_p only if the Method is valid.
address bcp = fr->interpreter_frame_bcp();
int bci = method->validate_bci_from_bcp(bcp);
......@@ -245,18 +265,33 @@ static bool is_decipherable_interpreted_frame(JavaThread* thread,
}
// Determine if 'fr' can be used to find an initial Java frame.
// Return false if it can not find a fully decipherable Java frame
// (in other words a frame that isn't safe to use in a vframe stream).
// Obviously if it can't even find a Java frame false will also be returned.
// Determine if a Java frame can be found starting with the frame 'fr'.
//
// Check the return value of find_initial_Java_frame and the value of
// 'method_p' to decide on how use the results returned by this method.
//
// If 'method_p' is not NULL, an initial Java frame has been found and
// the stack can be walked starting from that initial frame. In this case,
// 'method_p' points to the Method that the initial frame belongs to and
// the initial Java frame is returned in initial_frame_p.
//
// find_initial_Java_frame() returns true if a Method has been found (i.e.,
// 'method_p' is not NULL) and the initial frame that belongs to that Method
// is decipherable.
//
// If we find a Java frame decipherable or not then by definition we have
// identified a method and that will be returned to the caller via method_p.
// If we can determine a bci that is returned also. (Hmm is it possible
// to return a method and bci and still return false? )
// A frame is considered to be decipherable:
//
// The initial Java frame we find (if any) is return via initial_frame_p.
// - if the frame is a compiled frame and a PCDesc is available;
//
// - if the frame is an interpreter frame that is valid or the thread is
// state (_thread_in_native || state == _thread_in_vm || state == _thread_blocked).
//
// Note that find_initial_Java_frame() can return false even if an initial
// Java method was found (e.g., there is no PCDesc available for the method).
//
// If 'method_p' is NULL, it was not possible to find a Java frame when
// walking the stack starting from 'fr'. In this case find_initial_Java_frame
// returns false.
static bool find_initial_Java_frame(JavaThread* thread,
frame* fr,
......@@ -276,8 +311,6 @@ static bool find_initial_Java_frame(JavaThread* thread,
// recognizable to us. This should only happen if we are in a JRT_LEAF
// or something called by a JRT_LEAF method.
frame candidate = *fr;
// If the starting frame we were given has no codeBlob associated with
......@@ -332,9 +365,11 @@ static bool find_initial_Java_frame(JavaThread* thread,
nmethod* nm = (nmethod*) candidate.cb();
*method_p = nm->method();
// If the frame isn't fully decipherable then the default
// value for the bci is a signal that we don't have a bci.
// If we have a decipherable frame this bci value will
// If the frame is not decipherable, then the value of -1
// for the BCI is used to signal that no BCI is available.
// Furthermore, the method returns false in this case.
//
// If a decipherable frame is available, the BCI value will
// not be used.
*bci_p = -1;
......@@ -345,9 +380,9 @@ static bool find_initial_Java_frame(JavaThread* thread,
if (nm->is_native_method()) return true;
// If it isn't decipherable then we have found a pc that doesn't
// have a PCDesc that can get us a bci however we did find
// a method
// If the frame is not decipherable, then a PC was found
// that does not have a PCDesc from which a BCI can be obtained.
// Nevertheless, a Method was found.
if (!is_decipherable_compiled_frame(thread, &candidate, nm)) {
return false;
......@@ -356,7 +391,7 @@ static bool find_initial_Java_frame(JavaThread* thread,
// is_decipherable_compiled_frame may modify candidate's pc
*initial_frame_p = candidate;
assert(nm->pc_desc_at(candidate.pc()) != NULL, "if it's decipherable then pc must be valid");
assert(nm->pc_desc_at(candidate.pc()) != NULL, "debug information must be available if the frame is decipherable");
return true;
}
......@@ -386,17 +421,17 @@ static void forte_fill_call_trace_given_top(JavaThread* thd,
frame initial_Java_frame;
Method* method;
int bci;
int bci = -1; // assume BCI is not available for method
// update with correct information if available
int count;
count = 0;
assert(trace->frames != NULL, "trace->frames must be non-NULL");
bool fully_decipherable = find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci);
// The frame might not be walkable but still recovered a method
// (e.g. an nmethod with no scope info for the pc)
// Walk the stack starting from 'top_frame' and search for an initial Java frame.
find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci);
// Check if a Java Method has been found.
if (method == NULL) return;
if (!method->is_valid_method()) {
......@@ -404,29 +439,6 @@ static void forte_fill_call_trace_given_top(JavaThread* thd,
return;
}
// We got a Java frame however it isn't fully decipherable
// so it won't necessarily be safe to use it for the
// initial frame in the vframe stream.
if (!fully_decipherable) {
// Take whatever method the top-frame decoder managed to scrape up.
// We look further at the top frame only if non-safepoint
// debugging information is available.
count++;
trace->num_frames = count;
trace->frames[0].method_id = method->find_jmethod_id_or_null();
if (!method->is_native()) {
trace->frames[0].lineno = bci;
} else {
trace->frames[0].lineno = -3;
}
if (!initial_Java_frame.safe_for_sender(thd)) return;
RegisterMap map(thd, false);
initial_Java_frame = initial_Java_frame.sender(&map);
}
vframeStreamForte st(thd, initial_Java_frame, false);
for (; !st.at_end() && count < depth; st.forte_next(), count++) {
......
......@@ -3918,7 +3918,11 @@ class CommandLineFlags {
"Use locked-tracing when doing event-based tracing") \
\
diagnostic(bool, UseUnalignedAccesses, false, \
"Use unaligned memory accesses in sun.misc.Unsafe")
"Use unaligned memory accesses in sun.misc.Unsafe") \
\
product_pd(bool, PreserveFramePointer, \
"Use the FP register for holding the frame pointer " \
"and not as a general purpose register.")
/*
* Macros for factoring of globals
......
......@@ -1179,7 +1179,7 @@ methodHandle SharedRuntime::resolve_sub_helper(JavaThread *thread,
#endif
// JSR 292 key invariant:
// If the resolved method is a MethodHandle invoke target the call
// If the resolved method is a MethodHandle invoke target, the call
// site must be a MethodHandle call site, because the lambda form might tail-call
// leaving the stack in a state unknown to either caller or callee
// TODO detune for now but we might need it again
......
......@@ -389,12 +389,12 @@ inline void vframeStreamCommon::fill_from_compiled_frame(int decode_offset) {
decode_offset < 0 ||
decode_offset >= nm()->scopes_data_size()) {
// 6379830 AsyncGetCallTrace sometimes feeds us wild frames.
// If we attempt to read nmethod::scopes_data at serialized_null (== 0),
// or if we read some at other crazy offset,
// we will decode garbage and make wild references into the heap,
// leading to crashes in product mode.
// (This isn't airtight, of course, since there are internal
// offsets which are also crazy.)
// If we read nmethod::scopes_data at serialized_null (== 0)
// or if read some at other invalid offset, invalid values will be decoded.
// Based on these values, invalid heap locations could be referenced
// that could lead to crashes in product mode.
// Therefore, do not use the decode offset if invalid, but fill the frame
// as it were a native compiled frame (no Java-level assumptions).
#ifdef ASSERT
if (WizardMode) {
tty->print_cr("Error in fill_from_frame: pc_desc for "
......@@ -514,9 +514,15 @@ inline void vframeStreamCommon::fill_from_interpreter_frame() {
address bcp = _frame.interpreter_frame_bcp();
int bci = method->validate_bci_from_bcp(bcp);
// 6379830 AsyncGetCallTrace sometimes feeds us wild frames.
// AsyncGetCallTrace interrupts the VM asynchronously. As a result
// it is possible to access an interpreter frame for which
// no Java-level information is yet available (e.g., becasue
// the frame was being created when the VM interrupted it).
// In this scenario, pretend that the interpreter is at the point
// of entering the method.
if (bci < 0) {
found_bad_method_frame();
bci = 0; // pretend it's on the point of entering
bci = 0;
}
_mode = interpreted_mode;
_method = method;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册