From 8f522e2808f30d35df11fa8d6d63cdcc9e72bb48 Mon Sep 17 00:00:00 2001 From: zmajo Date: Mon, 27 Apr 2015 10:49:43 +0200 Subject: [PATCH] 8068945: Use RBP register as proper frame pointer in JIT compiled code on x86 Summary: Introduce the PreserveFramePointer flag to control if RBP is used as the frame pointer or as a general purpose register. Reviewed-by: kvn, roland, dlong, enevill, shade --- .../sun/jvm/hotspot/runtime/x86/X86Frame.java | 23 +- .../src/cpu/aarch64/vm/globals_aarch64.hpp | 2 + hotspot/src/cpu/ppc/vm/globals_ppc.hpp | 2 + hotspot/src/cpu/sparc/vm/globals_sparc.hpp | 2 + hotspot/src/cpu/x86/vm/assembler_x86.hpp | 6 +- hotspot/src/cpu/x86/vm/c1_FrameMap_x86.cpp | 7 +- .../src/cpu/x86/vm/c1_MacroAssembler_x86.cpp | 3 + hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp | 12 +- hotspot/src/cpu/x86/vm/frame_x86.cpp | 30 +- hotspot/src/cpu/x86/vm/frame_x86.hpp | 13 +- hotspot/src/cpu/x86/vm/frame_x86.inline.hpp | 2 +- hotspot/src/cpu/x86/vm/globals_x86.hpp | 2 + hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp | 9 + hotspot/src/cpu/x86/vm/methodHandles_x86.cpp | 2 +- hotspot/src/cpu/x86/vm/runtime_x86_32.cpp | 4 - .../src/cpu/x86/vm/sharedRuntime_x86_64.cpp | 12 +- hotspot/src/cpu/x86/vm/x86.ad | 15 - hotspot/src/cpu/x86/vm/x86_32.ad | 133 ++--- hotspot/src/cpu/x86/vm/x86_64.ad | 474 +++++++++++------- hotspot/src/share/vm/c1/c1_GraphBuilder.cpp | 2 +- hotspot/src/share/vm/c1/c1_LIR.cpp | 5 +- hotspot/src/share/vm/c1/c1_LIR.hpp | 6 +- hotspot/src/share/vm/c1/c1_LIRGenerator.cpp | 15 +- hotspot/src/share/vm/opto/bytecodeInfo.cpp | 8 +- hotspot/src/share/vm/prims/forte.cpp | 112 +++-- hotspot/src/share/vm/runtime/globals.hpp | 6 +- .../src/share/vm/runtime/sharedRuntime.cpp | 2 +- hotspot/src/share/vm/runtime/vframe.hpp | 20 +- 28 files changed, 515 insertions(+), 414 deletions(-) diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java index 0d6d532896..558aa4a8db 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java @@ -314,26 +314,17 @@ public class X86Frame extends Frame { //------------------------------------------------------------------------------ // frame::adjust_unextended_sp private void adjustUnextendedSP() { - // If we are returning to a compiled MethodHandle call site, the - // saved_fp will in fact be a saved value of the unextended SP. The - // simplest way to tell whether we are returning to such a call site - // is as follows: + // On x86, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. CodeBlob cb = cb(); NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); if (senderNm != null) { - // If the sender PC is a deoptimization point, get the original - // PC. For MethodHandle call site the unextended_sp is stored in - // saved_fp. - if (senderNm.isDeoptMhEntry(getPC())) { - // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP())); - raw_unextendedSP = getFP(); - } - else if (senderNm.isDeoptEntry(getPC())) { - // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp)); - } - else if (senderNm.isMethodHandleReturn(getPC())) { - raw_unextendedSP = getFP(); + // If the sender PC is a deoptimization point, get the original PC. + if (senderNm.isDeoptEntry(getPC()) || + senderNm.isDeoptMhEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); } } } diff --git a/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp index d0baef3988..c073735a7e 100644 --- a/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp @@ -68,6 +68,8 @@ define_pd_global(bool, RewriteFrequentPairs, false); define_pd_global(bool, UseMembar, true); +define_pd_global(bool, PreserveFramePointer, false); + // GC Ergo Flags define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread diff --git a/hotspot/src/cpu/ppc/vm/globals_ppc.hpp b/hotspot/src/cpu/ppc/vm/globals_ppc.hpp index f2391d251e..705267859a 100644 --- a/hotspot/src/cpu/ppc/vm/globals_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/globals_ppc.hpp @@ -55,6 +55,8 @@ define_pd_global(bool, RewriteFrequentPairs, true); define_pd_global(bool, UseMembar, false); +define_pd_global(bool, PreserveFramePointer, false); + // GC Ergo Flags define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // Default max size of CMS young gen, per GC worker thread. diff --git a/hotspot/src/cpu/sparc/vm/globals_sparc.hpp b/hotspot/src/cpu/sparc/vm/globals_sparc.hpp index 2873f441f9..800e6ef5a7 100644 --- a/hotspot/src/cpu/sparc/vm/globals_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/globals_sparc.hpp @@ -74,6 +74,8 @@ define_pd_global(bool, RewriteFrequentPairs, true); define_pd_global(bool, UseMembar, false); +define_pd_global(bool, PreserveFramePointer, false); + // GC Ergo Flags define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // default max size of CMS young gen, per GC worker thread diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.hpp index b7ab95df48..742d6c5eed 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp @@ -142,8 +142,10 @@ REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved #endif // _LP64 -// JSR 292 fixed register usages: -REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp); +// JSR 292 +// On x86, the SP does not have to be saved when invoking method handle intrinsics +// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg. +REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg); // Address is an abstraction used to represent a memory location // using any of the amd64 addressing modes with one object. diff --git a/hotspot/src/cpu/x86/vm/c1_FrameMap_x86.cpp b/hotspot/src/cpu/x86/vm/c1_FrameMap_x86.cpp index f75eca7286..bec7eed65e 100644 --- a/hotspot/src/cpu/x86/vm/c1_FrameMap_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_FrameMap_x86.cpp @@ -343,14 +343,13 @@ LIR_Opr FrameMap::stack_pointer() { return FrameMap::rsp_opr; } - // JSR 292 +// On x86, there is no need to save the SP, because neither +// method handle intrinsics, nor compiled lambda forms modify it. LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { - assert(rbp == rbp_mh_SP_save, "must be same register"); - return rbp_opr; + return LIR_OprFact::illegalOpr; } - bool FrameMap::validate_frame() { return true; } diff --git a/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp index 0c8d0767ed..cf1a59a30b 100644 --- a/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp @@ -360,6 +360,9 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by generate_stack_overflow_check(bang_size_in_bytes); push(rbp); + if (PreserveFramePointer) { + mov(rbp, rsp); + } #ifdef TIERED // c2 leaves fpu stack dirty. Clean it on entry if (UseSSE < 2 ) { diff --git a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp index fa7e8c3dd0..152ad2e018 100644 --- a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp @@ -754,14 +754,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { // WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP // since we do a leave anyway. - // Pop the return address since we are possibly changing SP (restoring from BP). + // Pop the return address. __ leave(); __ pop(rcx); - - // Restore SP from BP if the exception PC is a method handle call site. - NOT_LP64(__ get_thread(thread);) - __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0); - __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save); __ jmp(rcx); // jump to exception handler break; default: ShouldNotReachHere(); @@ -832,11 +827,6 @@ void Runtime1::generate_unwind_exception(StubAssembler *sasm) { // the pop is also necessary to simulate the effect of a ret(0) __ pop(exception_pc); - // Restore SP from BP if the exception PC is a method handle call site. - NOT_LP64(__ get_thread(thread);) - __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0); - __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save); - // continue at exception handler (return address removed) // note: do *not* remove arguments when unwinding the // activation since the caller assumes having diff --git a/hotspot/src/cpu/x86/vm/frame_x86.cpp b/hotspot/src/cpu/x86/vm/frame_x86.cpp index 654f0689ec..525b13e684 100644 --- a/hotspot/src/cpu/x86/vm/frame_x86.cpp +++ b/hotspot/src/cpu/x86/vm/frame_x86.cpp @@ -224,7 +224,8 @@ bool frame::safe_for_sender(JavaThread *thread) { if (sender_blob->is_nmethod()) { nmethod* nm = sender_blob->as_nmethod_or_null(); if (nm != NULL) { - if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) { + if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || + nm->method()->is_method_handle_intrinsic()) { return false; } } @@ -391,10 +392,9 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const { // frame::verify_deopt_original_pc // // Verifies the calculated original PC of a deoptimization PC for the -// given unextended SP. The unextended SP might also be the saved SP -// for MethodHandle call sites. +// given unextended SP. #ifdef ASSERT -void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) { +void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp) { frame fr; // This is ugly but it's better than to change {get,set}_original_pc @@ -404,33 +404,23 @@ void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool address original_pc = nm->get_original_pc(&fr); assert(nm->insts_contains(original_pc), "original PC must be in nmethod"); - assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be"); } #endif //------------------------------------------------------------------------------ // frame::adjust_unextended_sp void frame::adjust_unextended_sp() { - // If we are returning to a compiled MethodHandle call site, the - // saved_fp will in fact be a saved value of the unextended SP. The - // simplest way to tell whether we are returning to such a call site - // is as follows: + // On x86, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null(); if (sender_nm != NULL) { - // If the sender PC is a deoptimization point, get the original - // PC. For MethodHandle call site the unextended_sp is stored in - // saved_fp. - if (sender_nm->is_deopt_mh_entry(_pc)) { - DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp)); - _unextended_sp = _fp; - } - else if (sender_nm->is_deopt_entry(_pc)) { + // If the sender PC is a deoptimization point, get the original PC. + if (sender_nm->is_deopt_entry(_pc) || + sender_nm->is_deopt_mh_entry(_pc)) { DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp)); } - else if (sender_nm->is_method_handle_return(_pc)) { - _unextended_sp = _fp; - } } } diff --git a/hotspot/src/cpu/x86/vm/frame_x86.hpp b/hotspot/src/cpu/x86/vm/frame_x86.hpp index 602b8ff186..3b7cee89f0 100644 --- a/hotspot/src/cpu/x86/vm/frame_x86.hpp +++ b/hotspot/src/cpu/x86/vm/frame_x86.hpp @@ -76,11 +76,11 @@ // [locals and parameters ] // <- sender sp -// [1] When the c++ interpreter calls a new method it returns to the frame +// [1] When the C++ interpreter calls a new method it returns to the frame // manager which allocates a new frame on the stack. In that case there // is no real callee of this newly allocated frame. The frame manager is -// aware of the additional frame(s) and will pop them as nested calls -// complete. Howevers tTo make it look good in the debugger the frame +// aware of the additional frame(s) and will pop them as nested calls +// complete. However, to make it look good in the debugger the frame // manager actually installs a dummy pc pointing to RecursiveInterpreterActivation // with a fake interpreter_state* parameter to make it easy to debug // nested calls. @@ -88,7 +88,7 @@ // Note that contrary to the layout for the assembly interpreter the // expression stack allocated for the C++ interpreter is full sized. // However this is not as bad as it seems as the interpreter frame_manager -// will truncate the unused space on succesive method calls. +// will truncate the unused space on successive method calls. // // ------------------------------ C++ interpreter ---------------------------------------- @@ -167,10 +167,7 @@ #ifdef ASSERT // Used in frame::sender_for_{interpreter,compiled}_frame - static void verify_deopt_original_pc( nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false); - static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) { - verify_deopt_original_pc(nm, unextended_sp, true); - } + static void verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp); #endif public: diff --git a/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp b/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp index 0f1839692d..7f6a99ad2d 100644 --- a/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp +++ b/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp @@ -94,7 +94,7 @@ inline frame::frame(intptr_t* sp, intptr_t* fp) { // find_blob call. This is also why we can have no asserts on the validity // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler // -> pd_last_frame should use a specialized version of pd_last_frame which could - // call a specilaized frame constructor instead of this one. + // call a specialized frame constructor instead of this one. // Then we could use the assert below. However this assert is of somewhat dubious // value. // assert(_pc != NULL, "no pc?"); diff --git a/hotspot/src/cpu/x86/vm/globals_x86.hpp b/hotspot/src/cpu/x86/vm/globals_x86.hpp index a6d0fbbb33..61b168bfbb 100644 --- a/hotspot/src/cpu/x86/vm/globals_x86.hpp +++ b/hotspot/src/cpu/x86/vm/globals_x86.hpp @@ -82,6 +82,8 @@ define_pd_global(size_t, CMSYoungGenPerWorker, 64*M); // default max size of CM define_pd_global(uintx, TypeProfileLevel, 111); +define_pd_global(bool, PreserveFramePointer, false); + #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ \ develop(bool, IEEEPrecision, true, \ diff --git a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp index 9663bd7889..2b59daf59d 100644 --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp @@ -6090,6 +6090,10 @@ void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_ // We always push rbp, so that on return to interpreter rbp, will be // restored correctly and we can correct the stack. push(rbp); + // Save caller's stack pointer into RBP if the frame pointer is preserved. + if (PreserveFramePointer) { + mov(rbp, rsp); + } // Remove word for ebp framesize -= wordSize; @@ -6104,6 +6108,11 @@ void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_ // Save RBP register now. framesize -= wordSize; movptr(Address(rsp, framesize), rbp); + // Save caller's stack pointer into RBP if the frame pointer is preserved. + if (PreserveFramePointer) { + movptr(rbp, rsp); + addptr(rbp, framesize + wordSize); + } } if (VerifyStackAtCalls) { // Majik cookie to verify stack depth diff --git a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp index b3776ed8c3..65a2f3bf30 100644 --- a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp +++ b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp @@ -374,7 +374,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, // member_reg - MemberName that was the trailing argument // temp1_recv_klass - klass of stacked receiver, if needed // rsi/r13 - interpreter linkage (if interpreted) - // rcx, rdx, rsi, rdi, r8, r8 - compiler arguments (if compiled) + // rcx, rdx, rsi, rdi, r8 - compiler arguments (if compiled) Label L_incompatible_class_change_error; switch (iid) { diff --git a/hotspot/src/cpu/x86/vm/runtime_x86_32.cpp b/hotspot/src/cpu/x86/vm/runtime_x86_32.cpp index 1cc10d7662..36457cb817 100644 --- a/hotspot/src/cpu/x86/vm/runtime_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/runtime_x86_32.cpp @@ -126,10 +126,6 @@ void OptoRuntime::generate_exception_blob() { // rax: exception handler for given - // Restore SP from BP if the exception PC is a MethodHandle call site. - __ cmpl(Address(rcx, JavaThread::is_method_handle_return_offset()), 0); - __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save); - // We have a handler in rax, (could be deopt blob) // rdx - throwing pc, deopt blob will need it. diff --git a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp index afc137b69c..7984914255 100644 --- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp @@ -3393,8 +3393,8 @@ void OptoRuntime::generate_exception_blob() { // Save callee-saved registers. See x86_64.ad. - // rbp is an implicitly saved callee saved register (i.e. the calling - // convention will save restore it in prolog/epilog) Other than that + // rbp is an implicitly saved callee saved register (i.e., the calling + // convention will save/restore it in the prolog/epilog). Other than that // there are no callee save registers now that adapter frames are gone. __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); @@ -3436,9 +3436,9 @@ void OptoRuntime::generate_exception_blob() { // Restore callee-saved registers - // rbp is an implicitly saved callee saved register (i.e. the calling + // rbp is an implicitly saved callee-saved register (i.e., the calling // convention will save restore it in prolog/epilog) Other than that - // there are no callee save registers no that adapter frames are gone. + // there are no callee save registers now that adapter frames are gone. __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt)); @@ -3447,10 +3447,6 @@ void OptoRuntime::generate_exception_blob() { // rax: exception handler - // Restore SP from BP if the exception PC is a MethodHandle call site. - __ cmpl(Address(r15_thread, JavaThread::is_method_handle_return_offset()), 0); - __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save); - // We have a handler in rax (could be deopt blob). __ mov(r8, rax); diff --git a/hotspot/src/cpu/x86/vm/x86.ad b/hotspot/src/cpu/x86/vm/x86.ad index 68b42b840b..5c99066206 100644 --- a/hotspot/src/cpu/x86/vm/x86.ad +++ b/hotspot/src/cpu/x86/vm/x86.ad @@ -930,21 +930,6 @@ static inline jdouble replicate8_imm(int con, int width) { encode %{ - enc_class preserve_SP %{ - debug_only(int off0 = cbuf.insts_size()); - MacroAssembler _masm(&cbuf); - // RBP is preserved across all calls, even compiled calls. - // Use it to preserve RSP in places where the callee might change the SP. - __ movptr(rbp_mh_SP_save, rsp); - debug_only(int off1 = cbuf.insts_size()); - assert(off1 - off0 == preserve_SP_size(), "correct size prediction"); - %} - - enc_class restore_SP %{ - MacroAssembler _masm(&cbuf); - __ movptr(rsp, rbp_mh_SP_save); - %} - enc_class call_epilog %{ if (VerifyStackAtCalls) { // Check that stack depth is unchanged: find majik cookie on stack diff --git a/hotspot/src/cpu/x86/vm/x86_32.ad b/hotspot/src/cpu/x86/vm/x86_32.ad index e8b5438925..709c908c3b 100644 --- a/hotspot/src/cpu/x86/vm/x86_32.ad +++ b/hotspot/src/cpu/x86/vm/x86_32.ad @@ -123,50 +123,94 @@ alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) // +// Class for no registers (empty set). +reg_class no_reg(); + // Class for all registers -reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); +reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); +// Class for all registers (excluding EBP) +reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); +// Dynamic register class that selects at runtime between register classes +// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). +// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; +reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); + // Class for general registers -reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX); -// Class for general registers which may be used for implicit null checks on win95 -// Also safe for use by tailjump. We don't want to allocate in rbp, -reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX); +reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); +// Class for general registers (excluding EBP). +// This register class can be used for implicit null checks on win95. +// It is also safe for use by tailjumps (we don't want to allocate in ebp). +// Used also if the PreserveFramePointer flag is true. +reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); +// Dynamic register class that selects between int_reg and int_reg_no_ebp. +reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); + // Class of "X" registers reg_class int_x_reg(EBX, ECX, EDX, EAX); + // Class of registers that can appear in an address with no offset. // EBP and ESP require an extra instruction byte for zero offset. // Used in fast-unlock reg_class p_reg(EDX, EDI, ESI, EBX); -// Class for general registers not including ECX -reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX); -// Class for general registers not including EAX + +// Class for general registers excluding ECX +reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); +// Class for general registers excluding ECX (and EBP) +reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); +// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. +reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); + +// Class for general registers excluding EAX reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); -// Class for general registers not including EAX or EBX. -reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP); + +// Class for general registers excluding EAX and EBX. +reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); +// Class for general registers excluding EAX and EBX (and EBP) +reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); +// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. +reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); + // Class of EAX (for multiply and divide operations) reg_class eax_reg(EAX); + // Class of EBX (for atomic add) reg_class ebx_reg(EBX); + // Class of ECX (for shift and JCXZ operations and cmpLTMask) reg_class ecx_reg(ECX); + // Class of EDX (for multiply and divide operations) reg_class edx_reg(EDX); + // Class of EDI (for synchronization) reg_class edi_reg(EDI); + // Class of ESI (for synchronization) reg_class esi_reg(ESI); -// Singleton class for interpreter's stack pointer -reg_class ebp_reg(EBP); + // Singleton class for stack pointer reg_class sp_reg(ESP); + // Singleton class for instruction pointer // reg_class ip_reg(EIP); + // Class of integer register pairs -reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI ); +reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); +// Class of integer register pairs (excluding EBP and EDI); +reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); +// Dynamic register class that selects between long_reg and long_reg_no_ebp. +reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); + // Class of integer register pairs that aligns with calling convention reg_class eadx_reg( EAX,EDX ); reg_class ebcx_reg( ECX,EBX ); + // Not AX or DX, used in divides -reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP ); +reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); +// Not AX or DX (and neither EBP), used in divides +reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); +// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. +reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); // Floating point registers. Notice FPR0 is not a choice. // FPR0 is not ever allocated; we use clever encodings to fake @@ -240,18 +284,11 @@ static int pre_call_resets_size() { return size; } -static int preserve_SP_size() { - return 2; // op, rm(reg/reg) -} - // !!!!! Special hack to get all type of calls to specify the byte offset // from the start of the call to the point where the return address // will point. int MachCallStaticJavaNode::ret_addr_offset() { - int offset = 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points - if (_method_handle_invoke) - offset += preserve_SP_size(); - return offset; + return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points } int MachCallDynamicJavaNode::ret_addr_offset() { @@ -283,15 +320,6 @@ int CallStaticJavaDirectNode::compute_padding(int current_offset) const { return round_to(current_offset, alignment_required()) - current_offset; } -// The address of the call instruction needs to be 4-byte aligned to -// ensure that it does not span a cache line so that it can be patched. -int CallStaticJavaHandleNode::compute_padding(int current_offset) const { - current_offset += pre_call_resets_size(); // skip fldcw, if any - current_offset += preserve_SP_size(); // skip mov rbp, rsp - current_offset += 1; // skip call opcode byte - return round_to(current_offset, alignment_required()) - current_offset; -} - // The address of the call instruction needs to be 4-byte aligned to // ensure that it does not span a cache line so that it can be patched. int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { @@ -523,6 +551,10 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { st->print("# stack bang (%d bytes)", bangsize); st->print("\n\t"); st->print("PUSH EBP\t# Save EBP"); + if (PreserveFramePointer) { + st->print("\n\t"); + st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); + } if (framesize) { st->print("\n\t"); st->print("SUB ESP, #%d\t# Create frame",framesize); @@ -532,6 +564,10 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { st->print("\n\t"); framesize -= wordSize; st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); + if (PreserveFramePointer) { + st->print("\n\t"); + st->print("MOV EBP, [ESP + #%d]\t# Save the caller's SP into EBP", (framesize + wordSize)); + } } if (VerifyStackAtCalls) { @@ -1489,7 +1525,7 @@ RegMask Matcher::modL_proj_mask() { } const RegMask Matcher::method_handle_invoke_SP_save_mask() { - return EBP_REG_mask(); + return NO_REG_mask(); } // Returns true if the high 32 bits of the value is known to be zero. @@ -3735,7 +3771,7 @@ operand eRegP() %{ // On windows95, EBP is not safe to use for implicit null tests. operand eRegP_no_EBP() %{ - constraint(ALLOC_IN_RC(int_reg_no_rbp)); + constraint(ALLOC_IN_RC(int_reg_no_ebp)); match(RegP); match(eAXRegP); match(eBXRegP); @@ -3824,13 +3860,6 @@ operand eDIRegP(eRegP reg) %{ interface(REG_INTER); %} -operand eBPRegP() %{ - constraint(ALLOC_IN_RC(ebp_reg)); - match(RegP); - format %{ "EBP" %} - interface(REG_INTER); -%} - operand eRegL() %{ constraint(ALLOC_IN_RC(long_reg)); match(RegL); @@ -12615,7 +12644,6 @@ instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, // compute_padding() functions will have to be adjusted. instruct CallStaticJavaDirect(method meth) %{ match(CallStaticJava); - predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke()); effect(USE meth); ins_cost(300); @@ -12629,29 +12657,6 @@ instruct CallStaticJavaDirect(method meth) %{ ins_alignment(4); %} -// Call Java Static Instruction (method handle version) -// Note: If this code changes, the corresponding ret_addr_offset() and -// compute_padding() functions will have to be adjusted. -instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{ - match(CallStaticJava); - predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke()); - effect(USE meth); - // EBP is saved by all callees (for interpreter stack correction). - // We use it here for a similar purpose, in {preserve,restore}_SP. - - ins_cost(300); - format %{ "CALL,static/MethodHandle " %} - opcode(0xE8); /* E8 cd */ - ins_encode( pre_call_resets, - preserve_SP, - Java_Static_Call( meth ), - restore_SP, - call_epilog, - post_call_FPU ); - ins_pipe( pipe_slow ); - ins_alignment(4); -%} - // Call Java Dynamic Instruction // Note: If this code changes, the corresponding ret_addr_offset() and // compute_padding() functions will have to be adjusted. diff --git a/hotspot/src/cpu/x86/vm/x86_64.ad b/hotspot/src/cpu/x86/vm/x86_64.ad index b32ba52b78..9abe58f537 100644 --- a/hotspot/src/cpu/x86/vm/x86_64.ad +++ b/hotspot/src/cpu/x86/vm/x86_64.ad @@ -166,55 +166,67 @@ alloc_class chunk0(R10, R10_H, // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) // -// Class for all pointer registers (including RSP) -reg_class any_reg(RAX, RAX_H, - RDX, RDX_H, - RBP, RBP_H, - RDI, RDI_H, - RSI, RSI_H, - RCX, RCX_H, - RBX, RBX_H, - RSP, RSP_H, - R8, R8_H, - R9, R9_H, - R10, R10_H, - R11, R11_H, - R12, R12_H, - R13, R13_H, - R14, R14_H, - R15, R15_H); - -// Class for all pointer registers except RSP -reg_class ptr_reg(RAX, RAX_H, - RDX, RDX_H, - RBP, RBP_H, - RDI, RDI_H, - RSI, RSI_H, - RCX, RCX_H, - RBX, RBX_H, - R8, R8_H, - R9, R9_H, - R10, R10_H, - R11, R11_H, - R13, R13_H, - R14, R14_H); - -// Class for all pointer registers except RAX and RSP -reg_class ptr_no_rax_reg(RDX, RDX_H, - RBP, RBP_H, +// Empty register class. +reg_class no_reg(); + +// Class for all pointer registers (including RSP and RBP) +reg_class any_reg_with_rbp(RAX, RAX_H, + RDX, RDX_H, + RBP, RBP_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + RSP, RSP_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R12, R12_H, + R13, R13_H, + R14, R14_H, + R15, R15_H); + +// Class for all pointer registers (including RSP, but excluding RBP) +reg_class any_reg_no_rbp(RAX, RAX_H, + RDX, RDX_H, RDI, RDI_H, RSI, RSI_H, RCX, RCX_H, RBX, RBX_H, + RSP, RSP_H, R8, R8_H, R9, R9_H, R10, R10_H, R11, R11_H, + R12, R12_H, R13, R13_H, - R14, R14_H); - -reg_class ptr_no_rbp_reg(RDX, RDX_H, - RAX, RAX_H, + R14, R14_H, + R15, R15_H); + +// Dynamic register class that selects at runtime between register classes +// any_reg_no_rbp and any_reg_with_rbp (depending on the value of the flag PreserveFramePointer). +// Equivalent to: return PreserveFramePointer ? any_reg_no_rbp : any_reg_with_rbp; +reg_class_dynamic any_reg(any_reg_no_rbp, any_reg_with_rbp, %{ PreserveFramePointer %}); + +// Class for all pointer registers (excluding RSP) +reg_class ptr_reg_with_rbp(RAX, RAX_H, + RDX, RDX_H, + RBP, RBP_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Class for all pointer registers (excluding RSP and RBP) +reg_class ptr_reg_no_rbp(RAX, RAX_H, + RDX, RDX_H, RDI, RDI_H, RSI, RSI_H, RCX, RCX_H, @@ -226,18 +238,66 @@ reg_class ptr_no_rbp_reg(RDX, RDX_H, R13, R13_H, R14, R14_H); -// Class for all pointer registers except RAX, RBX and RSP -reg_class ptr_no_rax_rbx_reg(RDX, RDX_H, - RBP, RBP_H, - RDI, RDI_H, - RSI, RSI_H, - RCX, RCX_H, - R8, R8_H, - R9, R9_H, - R10, R10_H, - R11, R11_H, - R13, R13_H, - R14, R14_H); +// Dynamic register class that selects between ptr_reg_no_rbp and ptr_reg_with_rbp. +reg_class_dynamic ptr_reg(ptr_reg_no_rbp, ptr_reg_with_rbp, %{ PreserveFramePointer %}); + +// Class for all pointer registers (excluding RAX and RSP) +reg_class ptr_no_rax_reg_with_rbp(RDX, RDX_H, + RBP, RBP_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Class for all pointer registers (excluding RAX, RSP, and RBP) +reg_class ptr_no_rax_reg_no_rbp(RDX, RDX_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Dynamic register class that selects between ptr_no_rax_reg_no_rbp and ptr_no_rax_reg_with_rbp. +reg_class_dynamic ptr_no_rax_reg(ptr_no_rax_reg_no_rbp, ptr_no_rax_reg_with_rbp, %{ PreserveFramePointer %}); + +// Class for all pointer registers (excluding RAX, RBX, and RSP) +reg_class ptr_no_rax_rbx_reg_with_rbp(RDX, RDX_H, + RBP, RBP_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Class for all pointer registers (excluding RAX, RBX, RSP, and RBP) +reg_class ptr_no_rax_rbx_reg_no_rbp(RDX, RDX_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Dynamic register class that selects between ptr_no_rax_rbx_reg_no_rbp and ptr_no_rax_rbx_reg_with_rbp. +reg_class_dynamic ptr_no_rax_rbx_reg(ptr_no_rax_rbx_reg_no_rbp, ptr_no_rax_rbx_reg_with_rbp, %{ PreserveFramePointer %}); // Singleton class for RAX pointer register reg_class ptr_rax_reg(RAX, RAX_H); @@ -251,59 +311,29 @@ reg_class ptr_rsi_reg(RSI, RSI_H); // Singleton class for RDI pointer register reg_class ptr_rdi_reg(RDI, RDI_H); -// Singleton class for RBP pointer register -reg_class ptr_rbp_reg(RBP, RBP_H); - // Singleton class for stack pointer reg_class ptr_rsp_reg(RSP, RSP_H); // Singleton class for TLS pointer reg_class ptr_r15_reg(R15, R15_H); -// Class for all long registers (except RSP) -reg_class long_reg(RAX, RAX_H, - RDX, RDX_H, - RBP, RBP_H, - RDI, RDI_H, - RSI, RSI_H, - RCX, RCX_H, - RBX, RBX_H, - R8, R8_H, - R9, R9_H, - R10, R10_H, - R11, R11_H, - R13, R13_H, - R14, R14_H); - -// Class for all long registers except RAX, RDX (and RSP) -reg_class long_no_rax_rdx_reg(RBP, RBP_H, - RDI, RDI_H, - RSI, RSI_H, - RCX, RCX_H, - RBX, RBX_H, - R8, R8_H, - R9, R9_H, - R10, R10_H, - R11, R11_H, - R13, R13_H, - R14, R14_H); - -// Class for all long registers except RCX (and RSP) -reg_class long_no_rcx_reg(RBP, RBP_H, - RDI, RDI_H, - RSI, RSI_H, - RAX, RAX_H, - RDX, RDX_H, - RBX, RBX_H, - R8, R8_H, - R9, R9_H, - R10, R10_H, - R11, R11_H, - R13, R13_H, - R14, R14_H); - -// Class for all long registers except RAX (and RSP) -reg_class long_no_rax_reg(RBP, RBP_H, +// Class for all long registers (excluding RSP) +reg_class long_reg_with_rbp(RAX, RAX_H, + RDX, RDX_H, + RBP, RBP_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Class for all long registers (excluding RSP and RBP) +reg_class long_reg_no_rbp(RAX, RAX_H, RDX, RDX_H, RDI, RDI_H, RSI, RSI_H, @@ -316,6 +346,67 @@ reg_class long_no_rax_reg(RBP, RBP_H, R13, R13_H, R14, R14_H); +// Dynamic register class that selects between long_reg_no_rbp and long_reg_with_rbp. +reg_class_dynamic long_reg(long_reg_no_rbp, long_reg_with_rbp, %{ PreserveFramePointer %}); + +// Class for all long registers (excluding RAX, RDX and RSP) +reg_class long_no_rax_rdx_reg_with_rbp(RBP, RBP_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Class for all long registers (excluding RAX, RDX, RSP, and RBP) +reg_class long_no_rax_rdx_reg_no_rbp(RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Dynamic register class that selects between long_no_rax_rdx_reg_no_rbp and long_no_rax_rdx_reg_with_rbp. +reg_class_dynamic long_no_rax_rdx_reg(long_no_rax_rdx_reg_no_rbp, long_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %}); + +// Class for all long registers (excluding RCX and RSP) +reg_class long_no_rcx_reg_with_rbp(RBP, RBP_H, + RDI, RDI_H, + RSI, RSI_H, + RAX, RAX_H, + RDX, RDX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Class for all long registers (excluding RCX, RSP, and RBP) +reg_class long_no_rcx_reg_no_rbp(RDI, RDI_H, + RSI, RSI_H, + RAX, RAX_H, + RDX, RDX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Dynamic register class that selects between long_no_rcx_reg_no_rbp and long_no_rcx_reg_with_rbp. +reg_class_dynamic long_no_rcx_reg(long_no_rcx_reg_no_rbp, long_no_rcx_reg_with_rbp, %{ PreserveFramePointer %}); + // Singleton class for RAX long register reg_class long_rax_reg(RAX, RAX_H); @@ -325,27 +416,27 @@ reg_class long_rcx_reg(RCX, RCX_H); // Singleton class for RDX long register reg_class long_rdx_reg(RDX, RDX_H); -// Class for all int registers (except RSP) -reg_class int_reg(RAX, - RDX, - RBP, - RDI, - RSI, - RCX, - RBX, - R8, - R9, - R10, - R11, - R13, - R14); - -// Class for all int registers except RCX (and RSP) -reg_class int_no_rcx_reg(RAX, +// Class for all int registers (excluding RSP) +reg_class int_reg_with_rbp(RAX, + RDX, + RBP, + RDI, + RSI, + RCX, + RBX, + R8, + R9, + R10, + R11, + R13, + R14); + +// Class for all int registers (excluding RSP and RBP) +reg_class int_reg_no_rbp(RAX, RDX, - RBP, RDI, RSI, + RCX, RBX, R8, R9, @@ -354,18 +445,66 @@ reg_class int_no_rcx_reg(RAX, R13, R14); -// Class for all int registers except RAX, RDX (and RSP) -reg_class int_no_rax_rdx_reg(RBP, - RDI, - RSI, - RCX, - RBX, - R8, - R9, - R10, - R11, - R13, - R14); +// Dynamic register class that selects between int_reg_no_rbp and int_reg_with_rbp. +reg_class_dynamic int_reg(int_reg_no_rbp, int_reg_with_rbp, %{ PreserveFramePointer %}); + +// Class for all int registers (excluding RCX and RSP) +reg_class int_no_rcx_reg_with_rbp(RAX, + RDX, + RBP, + RDI, + RSI, + RBX, + R8, + R9, + R10, + R11, + R13, + R14); + +// Class for all int registers (excluding RCX, RSP, and RBP) +reg_class int_no_rcx_reg_no_rbp(RAX, + RDX, + RDI, + RSI, + RBX, + R8, + R9, + R10, + R11, + R13, + R14); + +// Dynamic register class that selects between int_no_rcx_reg_no_rbp and int_no_rcx_reg_with_rbp. +reg_class_dynamic int_no_rcx_reg(int_no_rcx_reg_no_rbp, int_no_rcx_reg_with_rbp, %{ PreserveFramePointer %}); + +// Class for all int registers (excluding RAX, RDX, and RSP) +reg_class int_no_rax_rdx_reg_with_rbp(RBP, + RDI, + RSI, + RCX, + RBX, + R8, + R9, + R10, + R11, + R13, + R14); + +// Class for all int registers (excluding RAX, RDX, RSP, and RBP) +reg_class int_no_rax_rdx_reg_no_rbp(RDI, + RSI, + RCX, + RBX, + R8, + R9, + R10, + R11, + R13, + R14); + +// Dynamic register class that selects between int_no_rax_rdx_reg_no_rbp and int_no_rax_rdx_reg_with_rbp. +reg_class_dynamic int_no_rax_rdx_reg(int_no_rax_rdx_reg_no_rbp, int_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %}); // Singleton class for RAX int register reg_class int_rax_reg(RAX); @@ -396,9 +535,6 @@ source %{ #define __ _masm. -static int preserve_SP_size() { - return 3; // rex.w, op, rm(reg/reg) -} static int clear_avx_size() { return (Compile::current()->max_vector_size() > 16) ? 3 : 0; // vzeroupper } @@ -409,9 +545,7 @@ static int clear_avx_size() { int MachCallStaticJavaNode::ret_addr_offset() { int offset = 5; // 5 bytes from start of call to where return address points - offset += clear_avx_size(); - if (_method_handle_invoke) - offset += preserve_SP_size(); + offset += clear_avx_size(); return offset; } @@ -448,16 +582,6 @@ int CallStaticJavaDirectNode::compute_padding(int current_offset) const return round_to(current_offset, alignment_required()) - current_offset; } -// The address of the call instruction needs to be 4-byte aligned to -// ensure that it does not span a cache line so that it can be patched. -int CallStaticJavaHandleNode::compute_padding(int current_offset) const -{ - current_offset += preserve_SP_size(); // skip mov rbp, rsp - current_offset += clear_avx_size(); // skip vzeroupper - current_offset += 1; // skip call opcode byte - return round_to(current_offset, alignment_required()) - current_offset; -} - // The address of the call instruction needs to be 4-byte aligned to // ensure that it does not span a cache line so that it can be patched. int CallDynamicJavaDirectNode::compute_padding(int current_offset) const @@ -724,6 +848,10 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { st->print("# stack bang (%d bytes)", bangsize); st->print("\n\t"); st->print("pushq rbp\t# Save rbp"); + if (PreserveFramePointer) { + st->print("\n\t"); + st->print("movq rbp, rsp\t# Save the caller's SP into rbp"); + } if (framesize) { st->print("\n\t"); st->print("subq rsp, #%d\t# Create frame",framesize); @@ -732,7 +860,11 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { st->print("subq rsp, #%d\t# Create frame",framesize); st->print("\n\t"); framesize -= wordSize; - st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize); + st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize); + if (PreserveFramePointer) { + st->print("\n\t"); + st->print("movq rbp, [rsp + #%d]\t# Save the caller's SP into rbp", (framesize + wordSize)); + } } if (VerifyStackAtCalls) { @@ -1598,8 +1730,9 @@ RegMask Matcher::modL_proj_mask() { return LONG_RDX_REG_mask(); } +// Register for saving SP into on method handle invokes. Not used on x86_64. const RegMask Matcher::method_handle_invoke_SP_save_mask() { - return PTR_RBP_REG_mask(); + return NO_REG_mask(); } %} @@ -3202,7 +3335,7 @@ operand no_rax_rdx_RegI() // Pointer Register operand any_RegP() %{ - constraint(ALLOC_IN_RC(any_reg)); + constraint(ALLOC_IN_RC(any_reg)); match(RegP); match(rax_RegP); match(rbx_RegP); @@ -3224,8 +3357,8 @@ operand rRegP() match(rbx_RegP); match(rdi_RegP); match(rsi_RegP); - match(rbp_RegP); - match(r15_RegP); // See Q&A below about r15_RegP. + match(rbp_RegP); // See Q&A below about + match(r15_RegP); // r15_RegP and rbp_RegP. format %{ %} interface(REG_INTER); @@ -3241,11 +3374,14 @@ operand rRegN() %{ // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP? // Answer: Operand match rules govern the DFA as it processes instruction inputs. -// It's fine for an instruction input which expects rRegP to match a r15_RegP. +// It's fine for an instruction input that expects rRegP to match a r15_RegP. // The output of an instruction is controlled by the allocator, which respects // register class masks, not match rules. Unless an instruction mentions // r15_RegP or any_RegP explicitly as its output, r15 will not be considered // by the allocator as an input. +// The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true, +// the RBP is used as a proper frame pointer and is not included in ptr_reg. As a +// result, RBP is not included in the output of the instruction either. operand no_rax_RegP() %{ @@ -3259,9 +3395,11 @@ operand no_rax_RegP() interface(REG_INTER); %} +// This operand is not allowed to use RBP even if +// RBP is not used to hold the frame pointer. operand no_rbp_RegP() %{ - constraint(ALLOC_IN_RC(ptr_no_rbp_reg)); + constraint(ALLOC_IN_RC(ptr_reg_no_rbp)); match(RegP); match(rbx_RegP); match(rsi_RegP); @@ -3338,16 +3476,6 @@ operand rdi_RegP() interface(REG_INTER); %} -operand rbp_RegP() -%{ - constraint(ALLOC_IN_RC(ptr_rbp_reg)); - match(RegP); - match(rRegP); - - format %{ %} - interface(REG_INTER); -%} - operand r15_RegP() %{ constraint(ALLOC_IN_RC(ptr_r15_reg)); @@ -11410,7 +11538,6 @@ instruct safePoint_poll_far(rFlagsReg cr, rRegP poll) // compute_padding() functions will have to be adjusted. instruct CallStaticJavaDirect(method meth) %{ match(CallStaticJava); - predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke()); effect(USE meth); ins_cost(300); @@ -11421,27 +11548,6 @@ instruct CallStaticJavaDirect(method meth) %{ ins_alignment(4); %} -// Call Java Static Instruction (method handle version) -// Note: If this code changes, the corresponding ret_addr_offset() and -// compute_padding() functions will have to be adjusted. -instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{ - match(CallStaticJava); - predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke()); - effect(USE meth); - // RBP is saved by all callees (for interpreter stack correction). - // We use it here for a similar purpose, in {preserve,restore}_SP. - - ins_cost(300); - format %{ "call,static/MethodHandle " %} - opcode(0xE8); /* E8 cd */ - ins_encode(clear_avx, preserve_SP, - Java_Static_Call(meth), - restore_SP, - call_epilog); - ins_pipe(pipe_slow); - ins_alignment(4); -%} - // Call Java Dynamic Instruction // Note: If this code changes, the corresponding ret_addr_offset() and // compute_padding() functions will have to be adjusted. diff --git a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp index 8960feb222..9a7c291b88 100644 --- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp +++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp @@ -4083,7 +4083,7 @@ bool GraphBuilder::try_method_handle_inline(ciMethod* callee) { ValueType* type = apop()->type(); if (type->is_constant()) { ciMethod* target = type->as_ObjectType()->constant_value()->as_member_name()->get_vmtarget(); - // If the target is another method handle invoke try recursivly to get + // If the target is another method handle invoke, try to recursively get // a better target. if (target->is_method_handle_intrinsic()) { if (try_method_handle_inline(target)) { diff --git a/hotspot/src/share/vm/c1/c1_LIR.cpp b/hotspot/src/share/vm/c1/c1_LIR.cpp index d58e3c85b4..7d2b4f3e88 100644 --- a/hotspot/src/share/vm/c1/c1_LIR.cpp +++ b/hotspot/src/share/vm/c1/c1_LIR.cpp @@ -458,7 +458,7 @@ void LIR_OpRTCall::verify() const { //-------------------visits-------------------------- // complete rework of LIR instruction visitor. -// The virtual calls for each instruction type is replaced by a big +// The virtual call for each instruction type is replaced by a big // switch that adds the operands for each instruction void LIR_OpVisitState::visit(LIR_Op* op) { @@ -825,7 +825,8 @@ void LIR_OpVisitState::visit(LIR_Op* op) { } if (opJavaCall->_info) do_info(opJavaCall->_info); - if (opJavaCall->is_method_handle_invoke()) { + if (FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr && + opJavaCall->is_method_handle_invoke()) { opJavaCall->_method_handle_invoke_SP_save_opr = FrameMap::method_handle_invoke_SP_save_opr(); do_temp(opJavaCall->_method_handle_invoke_SP_save_opr); } diff --git a/hotspot/src/share/vm/c1/c1_LIR.hpp b/hotspot/src/share/vm/c1/c1_LIR.hpp index 90a47c8b47..4affbfb082 100644 --- a/hotspot/src/share/vm/c1/c1_LIR.hpp +++ b/hotspot/src/share/vm/c1/c1_LIR.hpp @@ -1219,10 +1219,8 @@ class LIR_OpJavaCall: public LIR_OpCall { // JSR 292 support. bool is_invokedynamic() const { return code() == lir_dynamic_call; } bool is_method_handle_invoke() const { - return - method()->is_compiled_lambda_form() // Java-generated adapter - || - method()->is_method_handle_intrinsic(); // JVM-generated MH intrinsic + return method()->is_compiled_lambda_form() || // Java-generated lambda form + method()->is_method_handle_intrinsic(); // JVM-generated MH intrinsic } intptr_t vtable_offset() const { diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp index 5c8ddd01f8..cc35fc3ae3 100644 --- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp @@ -2875,7 +2875,7 @@ LIRItemList* LIRGenerator::invoke_visit_arguments(Invoke* x) { // g) lock result registers and emit call operation // // Before issuing a call, we must spill-save all values on stack -// that are in caller-save register. "spill-save" moves thos registers +// that are in caller-save register. "spill-save" moves those registers // either in a free callee-save register or spills them if no free // callee save register is available. // @@ -2883,7 +2883,7 @@ LIRItemList* LIRGenerator::invoke_visit_arguments(Invoke* x) { // - if invoked between e) and f), we may lock callee save // register in "spill-save" that destroys the receiver register // before f) is executed -// - if we rearange the f) to be earlier, by loading %o0, it +// - if we rearrange f) to be earlier (by loading %o0) it // may destroy a value on the stack that is currently in %o0 // and is waiting to be spilled // - if we keep the receiver locked while doing spill-save, @@ -2916,14 +2916,16 @@ void LIRGenerator::do_Invoke(Invoke* x) { assert(receiver->is_illegal() || receiver->is_equal(LIR_Assembler::receiverOpr()), "must match"); // JSR 292 - // Preserve the SP over MethodHandle call sites. + // Preserve the SP over MethodHandle call sites, if needed. ciMethod* target = x->target(); bool is_method_handle_invoke = (// %%% FIXME: Are both of these relevant? target->is_method_handle_intrinsic() || target->is_compiled_lambda_form()); if (is_method_handle_invoke) { info->set_is_method_handle_invoke(true); - __ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr()); + if(FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr) { + __ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr()); + } } switch (x->code()) { @@ -2963,8 +2965,9 @@ void LIRGenerator::do_Invoke(Invoke* x) { } // JSR 292 - // Restore the SP after MethodHandle call sites. - if (is_method_handle_invoke) { + // Restore the SP after MethodHandle call sites, if needed. + if (is_method_handle_invoke + && FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr) { __ move(FrameMap::method_handle_invoke_SP_save_opr(), FrameMap::stack_pointer()); } diff --git a/hotspot/src/share/vm/opto/bytecodeInfo.cpp b/hotspot/src/share/vm/opto/bytecodeInfo.cpp index 757b971766..fa476d6240 100644 --- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp +++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp @@ -631,11 +631,11 @@ InlineTree *InlineTree::build_inline_tree_for_callee( ciMethod* callee_method, J } int max_inline_level_adjust = 0; if (caller_jvms->method() != NULL) { - if (caller_jvms->method()->is_compiled_lambda_form()) + if (caller_jvms->method()->is_compiled_lambda_form()) { max_inline_level_adjust += 1; // don't count actions in MH or indy adapter frames - else if (callee_method->is_method_handle_intrinsic() || - callee_method->is_compiled_lambda_form()) { - max_inline_level_adjust += 1; // don't count method handle calls from java.lang.invoke implem + } else if (callee_method->is_method_handle_intrinsic() || + callee_method->is_compiled_lambda_form()) { + max_inline_level_adjust += 1; // don't count method handle calls from java.lang.invoke implementation } if (max_inline_level_adjust != 0 && C->print_inlining() && (Verbose || WizardMode)) { CompileTask::print_inline_indent(inline_level()); diff --git a/hotspot/src/share/vm/prims/forte.cpp b/hotspot/src/share/vm/prims/forte.cpp index 2ae0e1bba9..19d715ce36 100644 --- a/hotspot/src/share/vm/prims/forte.cpp +++ b/hotspot/src/share/vm/prims/forte.cpp @@ -171,8 +171,27 @@ static bool is_decipherable_compiled_frame(JavaThread* thread, frame* fr, nmetho // Now do we have a useful PcDesc? if (pc_desc == NULL || pc_desc->scope_decode_offset() == DebugInformationRecorder::serialized_null) { - // No debug information available for this pc - // vframeStream would explode if we try and walk the frames. + // No debug information is available for this PC. + // + // vframeStreamCommon::fill_from_frame() will decode the frame depending + // on the state of the thread. + // + // Case #1: If the thread is in Java (state == _thread_in_Java), then + // the vframeStreamCommon object will be filled as if the frame were a native + // compiled frame. Therefore, no debug information is needed. + // + // Case #2: If the thread is in any other state, then two steps will be performed: + // - if asserts are enabled, found_bad_method_frame() will be called and + // the assert in found_bad_method_frame() will be triggered; + // - if asserts are disabled, the vframeStreamCommon object will be filled + // as if it were a native compiled frame. + // + // Case (2) is similar to the way interpreter frames are processed in + // vframeStreamCommon::fill_from_interpreter_frame in case no valid BCI + // was found for an interpreted frame. If asserts are enabled, the assert + // in found_bad_method_frame() will be triggered. If asserts are disabled, + // the vframeStreamCommon object will be filled afterwards as if the + // interpreter were at the point of entering into the method. return false; } @@ -229,9 +248,10 @@ static bool is_decipherable_interpreted_frame(JavaThread* thread, // a valid method. Then again we may have caught an interpreter // frame in the middle of construction and the bci field is // not yet valid. - - *method_p = method; if (!method->is_valid_method()) return false; + *method_p = method; // If the Method* found is invalid, it is + // ignored by forte_fill_call_trace_given_top(). + // So set method_p only if the Method is valid. address bcp = fr->interpreter_frame_bcp(); int bci = method->validate_bci_from_bcp(bcp); @@ -245,18 +265,33 @@ static bool is_decipherable_interpreted_frame(JavaThread* thread, } -// Determine if 'fr' can be used to find an initial Java frame. -// Return false if it can not find a fully decipherable Java frame -// (in other words a frame that isn't safe to use in a vframe stream). -// Obviously if it can't even find a Java frame false will also be returned. +// Determine if a Java frame can be found starting with the frame 'fr'. +// +// Check the return value of find_initial_Java_frame and the value of +// 'method_p' to decide on how use the results returned by this method. +// +// If 'method_p' is not NULL, an initial Java frame has been found and +// the stack can be walked starting from that initial frame. In this case, +// 'method_p' points to the Method that the initial frame belongs to and +// the initial Java frame is returned in initial_frame_p. +// +// find_initial_Java_frame() returns true if a Method has been found (i.e., +// 'method_p' is not NULL) and the initial frame that belongs to that Method +// is decipherable. // -// If we find a Java frame decipherable or not then by definition we have -// identified a method and that will be returned to the caller via method_p. -// If we can determine a bci that is returned also. (Hmm is it possible -// to return a method and bci and still return false? ) +// A frame is considered to be decipherable: // -// The initial Java frame we find (if any) is return via initial_frame_p. +// - if the frame is a compiled frame and a PCDesc is available; // +// - if the frame is an interpreter frame that is valid or the thread is +// state (_thread_in_native || state == _thread_in_vm || state == _thread_blocked). +// +// Note that find_initial_Java_frame() can return false even if an initial +// Java method was found (e.g., there is no PCDesc available for the method). +// +// If 'method_p' is NULL, it was not possible to find a Java frame when +// walking the stack starting from 'fr'. In this case find_initial_Java_frame +// returns false. static bool find_initial_Java_frame(JavaThread* thread, frame* fr, @@ -276,8 +311,6 @@ static bool find_initial_Java_frame(JavaThread* thread, // recognizable to us. This should only happen if we are in a JRT_LEAF // or something called by a JRT_LEAF method. - - frame candidate = *fr; // If the starting frame we were given has no codeBlob associated with @@ -332,9 +365,11 @@ static bool find_initial_Java_frame(JavaThread* thread, nmethod* nm = (nmethod*) candidate.cb(); *method_p = nm->method(); - // If the frame isn't fully decipherable then the default - // value for the bci is a signal that we don't have a bci. - // If we have a decipherable frame this bci value will + // If the frame is not decipherable, then the value of -1 + // for the BCI is used to signal that no BCI is available. + // Furthermore, the method returns false in this case. + // + // If a decipherable frame is available, the BCI value will // not be used. *bci_p = -1; @@ -345,9 +380,9 @@ static bool find_initial_Java_frame(JavaThread* thread, if (nm->is_native_method()) return true; - // If it isn't decipherable then we have found a pc that doesn't - // have a PCDesc that can get us a bci however we did find - // a method + // If the frame is not decipherable, then a PC was found + // that does not have a PCDesc from which a BCI can be obtained. + // Nevertheless, a Method was found. if (!is_decipherable_compiled_frame(thread, &candidate, nm)) { return false; @@ -356,7 +391,7 @@ static bool find_initial_Java_frame(JavaThread* thread, // is_decipherable_compiled_frame may modify candidate's pc *initial_frame_p = candidate; - assert(nm->pc_desc_at(candidate.pc()) != NULL, "if it's decipherable then pc must be valid"); + assert(nm->pc_desc_at(candidate.pc()) != NULL, "debug information must be available if the frame is decipherable"); return true; } @@ -386,17 +421,17 @@ static void forte_fill_call_trace_given_top(JavaThread* thd, frame initial_Java_frame; Method* method; - int bci; + int bci = -1; // assume BCI is not available for method + // update with correct information if available int count; count = 0; assert(trace->frames != NULL, "trace->frames must be non-NULL"); - bool fully_decipherable = find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci); - - // The frame might not be walkable but still recovered a method - // (e.g. an nmethod with no scope info for the pc) + // Walk the stack starting from 'top_frame' and search for an initial Java frame. + find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci); + // Check if a Java Method has been found. if (method == NULL) return; if (!method->is_valid_method()) { @@ -404,29 +439,6 @@ static void forte_fill_call_trace_given_top(JavaThread* thd, return; } - // We got a Java frame however it isn't fully decipherable - // so it won't necessarily be safe to use it for the - // initial frame in the vframe stream. - - if (!fully_decipherable) { - // Take whatever method the top-frame decoder managed to scrape up. - // We look further at the top frame only if non-safepoint - // debugging information is available. - count++; - trace->num_frames = count; - trace->frames[0].method_id = method->find_jmethod_id_or_null(); - if (!method->is_native()) { - trace->frames[0].lineno = bci; - } else { - trace->frames[0].lineno = -3; - } - - if (!initial_Java_frame.safe_for_sender(thd)) return; - - RegisterMap map(thd, false); - initial_Java_frame = initial_Java_frame.sender(&map); - } - vframeStreamForte st(thd, initial_Java_frame, false); for (; !st.at_end() && count < depth; st.forte_next(), count++) { diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index c4815a992f..9f929e9933 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -3918,7 +3918,11 @@ class CommandLineFlags { "Use locked-tracing when doing event-based tracing") \ \ diagnostic(bool, UseUnalignedAccesses, false, \ - "Use unaligned memory accesses in sun.misc.Unsafe") + "Use unaligned memory accesses in sun.misc.Unsafe") \ + \ + product_pd(bool, PreserveFramePointer, \ + "Use the FP register for holding the frame pointer " \ + "and not as a general purpose register.") /* * Macros for factoring of globals diff --git a/hotspot/src/share/vm/runtime/sharedRuntime.cpp b/hotspot/src/share/vm/runtime/sharedRuntime.cpp index 04a23aa23c..5446391648 100644 --- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp +++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp @@ -1179,7 +1179,7 @@ methodHandle SharedRuntime::resolve_sub_helper(JavaThread *thread, #endif // JSR 292 key invariant: - // If the resolved method is a MethodHandle invoke target the call + // If the resolved method is a MethodHandle invoke target, the call // site must be a MethodHandle call site, because the lambda form might tail-call // leaving the stack in a state unknown to either caller or callee // TODO detune for now but we might need it again diff --git a/hotspot/src/share/vm/runtime/vframe.hpp b/hotspot/src/share/vm/runtime/vframe.hpp index 17ead61b78..badd129455 100644 --- a/hotspot/src/share/vm/runtime/vframe.hpp +++ b/hotspot/src/share/vm/runtime/vframe.hpp @@ -389,12 +389,12 @@ inline void vframeStreamCommon::fill_from_compiled_frame(int decode_offset) { decode_offset < 0 || decode_offset >= nm()->scopes_data_size()) { // 6379830 AsyncGetCallTrace sometimes feeds us wild frames. - // If we attempt to read nmethod::scopes_data at serialized_null (== 0), - // or if we read some at other crazy offset, - // we will decode garbage and make wild references into the heap, - // leading to crashes in product mode. - // (This isn't airtight, of course, since there are internal - // offsets which are also crazy.) + // If we read nmethod::scopes_data at serialized_null (== 0) + // or if read some at other invalid offset, invalid values will be decoded. + // Based on these values, invalid heap locations could be referenced + // that could lead to crashes in product mode. + // Therefore, do not use the decode offset if invalid, but fill the frame + // as it were a native compiled frame (no Java-level assumptions). #ifdef ASSERT if (WizardMode) { tty->print_cr("Error in fill_from_frame: pc_desc for " @@ -514,9 +514,15 @@ inline void vframeStreamCommon::fill_from_interpreter_frame() { address bcp = _frame.interpreter_frame_bcp(); int bci = method->validate_bci_from_bcp(bcp); // 6379830 AsyncGetCallTrace sometimes feeds us wild frames. + // AsyncGetCallTrace interrupts the VM asynchronously. As a result + // it is possible to access an interpreter frame for which + // no Java-level information is yet available (e.g., becasue + // the frame was being created when the VM interrupted it). + // In this scenario, pretend that the interpreter is at the point + // of entering the method. if (bci < 0) { found_bad_method_frame(); - bci = 0; // pretend it's on the point of entering + bci = 0; } _mode = interpreted_mode; _method = method; -- GitLab