/* * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. * Copyright 2012, 2013 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. * */ #include "precompiled.hpp" #include "asm/assembler.hpp" #include "asm/macroAssembler.inline.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/cppInterpreter.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterGenerator.hpp" #include "interpreter/interpreterRuntime.hpp" #include "oops/arrayOop.hpp" #include "oops/methodData.hpp" #include "oops/method.hpp" #include "oops/oop.inline.hpp" #include "prims/jvmtiExport.hpp" #include "prims/jvmtiThreadState.hpp" #include "runtime/arguments.hpp" #include "runtime/deoptimization.hpp" #include "runtime/frame.inline.hpp" #include "runtime/interfaceSupport.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" #include "runtime/synchronizer.hpp" #include "runtime/timer.hpp" #include "runtime/vframeArray.hpp" #include "utilities/debug.hpp" #ifdef SHARK #include "shark/shark_globals.hpp" #endif #ifdef CC_INTERP #define __ _masm-> // Contains is used for identifying interpreter frames during a stack-walk. // A frame with a PC in InterpretMethod must be identified as a normal C frame. bool CppInterpreter::contains(address pc) { return _code->contains(pc); } #ifdef PRODUCT #define BLOCK_COMMENT(str) // nothing #else #define BLOCK_COMMENT(str) __ block_comment(str) #endif #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") static address interpreter_frame_manager = NULL; static address frame_manager_specialized_return = NULL; static address native_entry = NULL; static address interpreter_return_address = NULL; static address unctrap_frame_manager_entry = NULL; static address deopt_frame_manager_return_atos = NULL; static address deopt_frame_manager_return_btos = NULL; static address deopt_frame_manager_return_itos = NULL; static address deopt_frame_manager_return_ltos = NULL; static address deopt_frame_manager_return_ftos = NULL; static address deopt_frame_manager_return_dtos = NULL; static address deopt_frame_manager_return_vtos = NULL; // A result handler converts/unboxes a native call result into // a java interpreter/compiler result. The current frame is an // interpreter frame. address CppInterpreterGenerator::generate_result_handler_for(BasicType type) { return AbstractInterpreterGenerator::generate_result_handler_for(type); } // tosca based result to c++ interpreter stack based result. address CppInterpreterGenerator::generate_tosca_to_stack_converter(BasicType type) { // // A result is in the native abi result register from a native // method call. We need to return this result to the interpreter by // pushing the result on the interpreter's stack. // // Registers alive: // R3_ARG1(R3_RET)/F1_ARG1(F1_RET) - result to move // R4_ARG2 - address of tos // LR // // Registers updated: // R3_RET(R3_ARG1) - address of new tos (== R17_tos for T_VOID) // int number_of_used_slots = 1; const Register tos = R4_ARG2; Label done; Label is_false; address entry = __ pc(); switch (type) { case T_BOOLEAN: __ cmpwi(CCR0, R3_RET, 0); __ beq(CCR0, is_false); __ li(R3_RET, 1); __ stw(R3_RET, 0, tos); __ b(done); __ bind(is_false); __ li(R3_RET, 0); __ stw(R3_RET, 0, tos); break; case T_BYTE: case T_CHAR: case T_SHORT: case T_INT: __ stw(R3_RET, 0, tos); break; case T_LONG: number_of_used_slots = 2; // mark unused slot for debugging // long goes to topmost slot __ std(R3_RET, -BytesPerWord, tos); __ li(R3_RET, 0); __ std(R3_RET, 0, tos); break; case T_OBJECT: __ verify_oop(R3_RET); __ std(R3_RET, 0, tos); break; case T_FLOAT: __ stfs(F1_RET, 0, tos); break; case T_DOUBLE: number_of_used_slots = 2; // mark unused slot for debugging __ li(R3_RET, 0); __ std(R3_RET, 0, tos); // double goes to topmost slot __ stfd(F1_RET, -BytesPerWord, tos); break; case T_VOID: number_of_used_slots = 0; break; default: ShouldNotReachHere(); } __ BIND(done); // new expression stack top __ addi(R3_RET, tos, -BytesPerWord * number_of_used_slots); __ blr(); return entry; } address CppInterpreterGenerator::generate_stack_to_stack_converter(BasicType type) { // // Copy the result from the callee's stack to the caller's stack, // caller and callee both being interpreted. // // Registers alive // R3_ARG1 - address of callee's tos + BytesPerWord // R4_ARG2 - address of caller's tos [i.e. free location] // LR // // stack grows upwards, memory grows downwards. // // [ free ] <-- callee's tos // [ optional result ] <-- R3_ARG1 // [ optional dummy ] // ... // [ free ] <-- caller's tos, R4_ARG2 // ... // Registers updated // R3_RET(R3_ARG1) - address of caller's new tos // // stack grows upwards, memory grows downwards. // // [ free ] <-- current tos, R3_RET // [ optional result ] // [ optional dummy ] // ... // const Register from = R3_ARG1; const Register ret = R3_ARG1; const Register tos = R4_ARG2; const Register tmp1 = R21_tmp1; const Register tmp2 = R22_tmp2; address entry = __ pc(); switch (type) { case T_BOOLEAN: case T_BYTE: case T_CHAR: case T_SHORT: case T_INT: case T_FLOAT: __ lwz(tmp1, 0, from); __ stw(tmp1, 0, tos); // New expression stack top. __ addi(ret, tos, - BytesPerWord); break; case T_LONG: case T_DOUBLE: // Move both entries for debug purposes even though only one is live. __ ld(tmp1, BytesPerWord, from); __ ld(tmp2, 0, from); __ std(tmp1, 0, tos); __ std(tmp2, -BytesPerWord, tos); // New expression stack top. __ addi(ret, tos, - 2 * BytesPerWord); // two slots break; case T_OBJECT: __ ld(tmp1, 0, from); __ verify_oop(tmp1); __ std(tmp1, 0, tos); // New expression stack top. __ addi(ret, tos, - BytesPerWord); break; case T_VOID: // New expression stack top. __ mr(ret, tos); break; default: ShouldNotReachHere(); } __ blr(); return entry; } address CppInterpreterGenerator::generate_stack_to_native_abi_converter(BasicType type) { // // Load a result from the callee's stack into the caller's expecting // return register, callee being interpreted, caller being call stub // or jit code. // // Registers alive // R3_ARG1 - callee expression tos + BytesPerWord // LR // // stack grows upwards, memory grows downwards. // // [ free ] <-- callee's tos // [ optional result ] <-- R3_ARG1 // [ optional dummy ] // ... // // Registers updated // R3_RET(R3_ARG1)/F1_RET - result // const Register from = R3_ARG1; const Register ret = R3_ARG1; const FloatRegister fret = F1_ARG1; address entry = __ pc(); // Implemented uniformly for both kinds of endianness. The interpreter // implements boolean, byte, char, and short as jint (4 bytes). switch (type) { case T_BOOLEAN: case T_CHAR: // zero extension __ lwz(ret, 0, from); break; case T_BYTE: case T_SHORT: case T_INT: // sign extension __ lwa(ret, 0, from); break; case T_LONG: __ ld(ret, 0, from); break; case T_OBJECT: __ ld(ret, 0, from); __ verify_oop(ret); break; case T_FLOAT: __ lfs(fret, 0, from); break; case T_DOUBLE: __ lfd(fret, 0, from); break; case T_VOID: break; default: ShouldNotReachHere(); } __ blr(); return entry; } address CppInterpreter::return_entry(TosState state, int length, Bytecodes::Code code) { assert(interpreter_return_address != NULL, "Not initialized"); return interpreter_return_address; } address CppInterpreter::deopt_entry(TosState state, int length) { address ret = NULL; if (length != 0) { switch (state) { case atos: ret = deopt_frame_manager_return_atos; break; case btos: ret = deopt_frame_manager_return_itos; break; case ctos: case stos: case itos: ret = deopt_frame_manager_return_itos; break; case ltos: ret = deopt_frame_manager_return_ltos; break; case ftos: ret = deopt_frame_manager_return_ftos; break; case dtos: ret = deopt_frame_manager_return_dtos; break; case vtos: ret = deopt_frame_manager_return_vtos; break; default: ShouldNotReachHere(); } } else { ret = unctrap_frame_manager_entry; // re-execute the bytecode (e.g. uncommon trap, popframe) } assert(ret != NULL, "Not initialized"); return ret; } // // Helpers for commoning out cases in the various type of method entries. // // // Registers alive // R16_thread - JavaThread* // R1_SP - old stack pointer // R19_method - callee's Method // R17_tos - address of caller's tos (prepushed) // R15_prev_state - address of caller's BytecodeInterpreter or 0 // return_pc in R21_tmp15 (only when called within generate_native_entry) // // Registers updated // R14_state - address of callee's interpreter state // R1_SP - new stack pointer // CCR4_is_synced - current method is synchronized // void CppInterpreterGenerator::generate_compute_interpreter_state(Label& stack_overflow_return) { // // Stack layout at this point: // // F1 [TOP_IJAVA_FRAME_ABI] <-- R1_SP // alignment (optional) // [F1's outgoing Java arguments] <-- R17_tos // ... // F2 [PARENT_IJAVA_FRAME_ABI] // ... //============================================================================= // Allocate space for locals other than the parameters, the // interpreter state, monitors, and the expression stack. const Register local_count = R21_tmp1; const Register parameter_count = R22_tmp2; const Register max_stack = R23_tmp3; // Must not be overwritten within this method! // const Register return_pc = R29_tmp9; const ConditionRegister is_synced = CCR4_is_synced; const ConditionRegister is_native = CCR6; const ConditionRegister is_static = CCR7; assert(is_synced != is_native, "condition code registers must be distinct"); assert(is_synced != is_static, "condition code registers must be distinct"); assert(is_native != is_static, "condition code registers must be distinct"); { // Local registers const Register top_frame_size = R24_tmp4; const Register access_flags = R25_tmp5; const Register state_offset = R26_tmp6; Register mem_stack_limit = R27_tmp7; const Register page_size = R28_tmp8; BLOCK_COMMENT("compute_interpreter_state {"); // access_flags = method->access_flags(); // TODO: PPC port: assert(4 == methodOopDesc::sz_access_flags(), "unexpected field size"); __ lwa(access_flags, method_(access_flags)); // parameter_count = method->constMethod->size_of_parameters(); // TODO: PPC port: assert(2 == ConstMethod::sz_size_of_parameters(), "unexpected field size"); __ ld(max_stack, in_bytes(Method::const_offset()), R19_method); // Max_stack holds constMethod for a while. __ lhz(parameter_count, in_bytes(ConstMethod::size_of_parameters_offset()), max_stack); // local_count = method->constMethod()->max_locals(); // TODO: PPC port: assert(2 == ConstMethod::sz_max_locals(), "unexpected field size"); __ lhz(local_count, in_bytes(ConstMethod::size_of_locals_offset()), max_stack); // max_stack = method->constMethod()->max_stack(); // TODO: PPC port: assert(2 == ConstMethod::sz_max_stack(), "unexpected field size"); __ lhz(max_stack, in_bytes(ConstMethod::max_stack_offset()), max_stack); if (EnableInvokeDynamic) { // Take into account 'extra_stack_entries' needed by method handles (see method.hpp). __ addi(max_stack, max_stack, Method::extra_stack_entries()); } // mem_stack_limit = thread->stack_limit(); __ ld(mem_stack_limit, thread_(stack_overflow_limit)); // Point locals at the first argument. Method's locals are the // parameters on top of caller's expression stack. // tos points past last Java argument __ sldi(R18_locals, parameter_count, Interpreter::logStackElementSize); __ add(R18_locals, R17_tos, R18_locals); // R18_locals - i*BytesPerWord points to i-th Java local (i starts at 0) // Set is_native, is_synced, is_static - will be used later. __ testbitdi(is_native, R0, access_flags, JVM_ACC_NATIVE_BIT); __ testbitdi(is_synced, R0, access_flags, JVM_ACC_SYNCHRONIZED_BIT); assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile"); __ testbitdi(is_static, R0, access_flags, JVM_ACC_STATIC_BIT); // PARENT_IJAVA_FRAME_ABI // // frame_size = // round_to((local_count - parameter_count)*BytesPerWord + // 2*BytesPerWord + // alignment + // frame::interpreter_frame_cinterpreterstate_size_in_bytes() // sizeof(PARENT_IJAVA_FRAME_ABI) // method->is_synchronized() ? sizeof(BasicObjectLock) : 0 + // max_stack*BytesPerWord, // 16) // // Note that this calculation is exactly mirrored by // AbstractInterpreter::layout_activation_impl() [ and // AbstractInterpreter::size_activation() ]. Which is used by // deoptimization so that it can allocate the proper sized // frame. This only happens for interpreted frames so the extra // notes below about max_stack below are not important. The other // thing to note is that for interpreter frames other than the // current activation the size of the stack is the size of the live // portion of the stack at the particular bcp and NOT the maximum // stack that the method might use. // // If we're calling a native method, we replace max_stack (which is // zero) with space for the worst-case signature handler varargs // vector, which is: // // max_stack = max(Argument::n_register_parameters, parameter_count+2); // // We add two slots to the parameter_count, one for the jni // environment and one for a possible native mirror. We allocate // space for at least the number of ABI registers, even though // InterpreterRuntime::slow_signature_handler won't write more than // parameter_count+2 words when it creates the varargs vector at the // top of the stack. The generated slow signature handler will just // load trash into registers beyond the necessary number. We're // still going to cut the stack back by the ABI register parameter // count so as to get SP+16 pointing at the ABI outgoing parameter // area, so we need to allocate at least that much even though we're // going to throw it away. // // Adjust max_stack for native methods: Label skip_native_calculate_max_stack; __ bfalse(is_native, skip_native_calculate_max_stack); // if (is_native) { // max_stack = max(Argument::n_register_parameters, parameter_count+2); __ addi(max_stack, parameter_count, 2*Interpreter::stackElementWords); __ cmpwi(CCR0, max_stack, Argument::n_register_parameters); __ bge(CCR0, skip_native_calculate_max_stack); __ li(max_stack, Argument::n_register_parameters); // } __ bind(skip_native_calculate_max_stack); // max_stack is now in bytes __ slwi(max_stack, max_stack, Interpreter::logStackElementSize); // Calculate number of non-parameter locals (in slots): Label not_java; __ btrue(is_native, not_java); // if (!is_native) { // local_count = non-parameter local count __ sub(local_count, local_count, parameter_count); // } else { // // nothing to do: method->max_locals() == 0 for native methods // } __ bind(not_java); // Calculate top_frame_size and parent_frame_resize. { const Register parent_frame_resize = R12_scratch2; BLOCK_COMMENT("Compute top_frame_size."); // top_frame_size = TOP_IJAVA_FRAME_ABI // + size of interpreter state __ li(top_frame_size, frame::top_ijava_frame_abi_size + frame::interpreter_frame_cinterpreterstate_size_in_bytes()); // + max_stack __ add(top_frame_size, top_frame_size, max_stack); // + stack slots for a BasicObjectLock for synchronized methods { Label not_synced; __ bfalse(is_synced, not_synced); __ addi(top_frame_size, top_frame_size, frame::interpreter_frame_monitor_size_in_bytes()); __ bind(not_synced); } // align __ round_to(top_frame_size, frame::alignment_in_bytes); BLOCK_COMMENT("Compute parent_frame_resize."); // parent_frame_resize = R1_SP - R17_tos __ sub(parent_frame_resize, R1_SP, R17_tos); //__ li(parent_frame_resize, 0); // + PARENT_IJAVA_FRAME_ABI // + extra two slots for the no-parameter/no-locals // method result __ addi(parent_frame_resize, parent_frame_resize, frame::parent_ijava_frame_abi_size + 2*Interpreter::stackElementSize); // + (locals_count - params_count) __ sldi(R0, local_count, Interpreter::logStackElementSize); __ add(parent_frame_resize, parent_frame_resize, R0); // align __ round_to(parent_frame_resize, frame::alignment_in_bytes); // // Stack layout at this point: // // The new frame F0 hasn't yet been pushed, F1 is still the top frame. // // F0 [TOP_IJAVA_FRAME_ABI] // alignment (optional) // [F0's full operand stack] // [F0's monitors] (optional) // [F0's BytecodeInterpreter object] // F1 [PARENT_IJAVA_FRAME_ABI] // alignment (optional) // [F0's Java result] // [F0's non-arg Java locals] // [F1's outgoing Java arguments] <-- R17_tos // ... // F2 [PARENT_IJAVA_FRAME_ABI] // ... // Calculate new R14_state // and // test that the new memory stack pointer is above the limit, // throw a StackOverflowError otherwise. __ sub(R11_scratch1/*F1's SP*/, R1_SP, parent_frame_resize); __ addi(R14_state, R11_scratch1/*F1's SP*/, -frame::interpreter_frame_cinterpreterstate_size_in_bytes()); __ sub(R11_scratch1/*F0's SP*/, R11_scratch1/*F1's SP*/, top_frame_size); BLOCK_COMMENT("Test for stack overflow:"); __ cmpld(CCR0/*is_stack_overflow*/, R11_scratch1, mem_stack_limit); __ blt(CCR0/*is_stack_overflow*/, stack_overflow_return); //============================================================================= // Frame_size doesn't overflow the stack. Allocate new frame and // initialize interpreter state. // Register state // // R15 - local_count // R16 - parameter_count // R17 - max_stack // // R18 - frame_size // R19 - access_flags // CCR4_is_synced - is_synced // // GR_Lstate - pointer to the uninitialized new BytecodeInterpreter. // _last_Java_pc just needs to be close enough that we can identify // the frame as an interpreted frame. It does not need to be the // exact return address from either calling // BytecodeInterpreter::InterpretMethod or the call to a jni native method. // So we can initialize it here with a value of a bundle in this // code fragment. We only do this initialization for java frames // where InterpretMethod needs a a way to get a good pc value to // store in the thread state. For interpreter frames used to call // jni native code we just zero the value in the state and move an // ip as needed in the native entry code. // // const Register last_Java_pc_addr = GR24_SCRATCH; // QQQ 27 // const Register last_Java_pc = GR26_SCRATCH; // Must reference stack before setting new SP since Windows // will not be able to deliver the exception on a bad SP. // Windows also insists that we bang each page one at a time in order // for the OS to map in the reserved pages. If we bang only // the final page, Windows stops delivering exceptions to our // VectoredExceptionHandler and terminates our program. // Linux only requires a single bang but it's rare to have // to bang more than 1 page so the code is enabled for both OS's. // BANG THE STACK // // Nothing to do for PPC, because updating the SP will automatically // bang the page. // Up to here we have calculated the delta for the new C-frame and // checked for a stack-overflow. Now we can savely update SP and // resize the C-frame. // R14_state has already been calculated. __ push_interpreter_frame(top_frame_size, parent_frame_resize, R25_tmp5, R26_tmp6, R27_tmp7, R28_tmp8); } // // Stack layout at this point: // // F0 has been been pushed! // // F0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP // alignment (optional) (now it's here, if required) // [F0's full operand stack] // [F0's monitors] (optional) // [F0's BytecodeInterpreter object] // F1 [PARENT_IJAVA_FRAME_ABI] // alignment (optional) (now it's here, if required) // [F0's Java result] // [F0's non-arg Java locals] // [F1's outgoing Java arguments] // ... // F2 [PARENT_IJAVA_FRAME_ABI] // ... // // R14_state points to F0's BytecodeInterpreter object. // } //============================================================================= // new BytecodeInterpreter-object is save, let's initialize it: BLOCK_COMMENT("New BytecodeInterpreter-object is save."); { // Locals const Register bytecode_addr = R24_tmp4; const Register constants = R25_tmp5; const Register tos = R26_tmp6; const Register stack_base = R27_tmp7; const Register local_addr = R28_tmp8; { Label L; __ btrue(is_native, L); // if (!is_native) { // bytecode_addr = constMethod->codes(); __ ld(bytecode_addr, method_(const)); __ addi(bytecode_addr, bytecode_addr, in_bytes(ConstMethod::codes_offset())); // } __ bind(L); } __ ld(constants, in_bytes(Method::const_offset()), R19_method); __ ld(constants, in_bytes(ConstMethod::constants_offset()), constants); // state->_prev_link = prev_state; __ std(R15_prev_state, state_(_prev_link)); // For assertions only. // TODO: not needed anyway because it coincides with `_monitor_base'. remove! // state->_self_link = state; DEBUG_ONLY(__ std(R14_state, state_(_self_link));) // state->_thread = thread; __ std(R16_thread, state_(_thread)); // state->_method = method; __ std(R19_method, state_(_method)); // state->_locals = locals; __ std(R18_locals, state_(_locals)); // state->_oop_temp = NULL; __ li(R0, 0); __ std(R0, state_(_oop_temp)); // state->_last_Java_fp = *R1_SP // Use *R1_SP as fp __ ld(R0, _abi(callers_sp), R1_SP); __ std(R0, state_(_last_Java_fp)); BLOCK_COMMENT("load Stack base:"); { // Stack_base. // if (!method->synchronized()) { // stack_base = state; // } else { // stack_base = (uintptr_t)state - sizeof(BasicObjectLock); // } Label L; __ mr(stack_base, R14_state); __ bfalse(is_synced, L); __ addi(stack_base, stack_base, -frame::interpreter_frame_monitor_size_in_bytes()); __ bind(L); } // state->_mdx = NULL; __ li(R0, 0); __ std(R0, state_(_mdx)); { // if (method->is_native()) state->_bcp = NULL; // else state->_bcp = bytecode_addr; Label label1, label2; __ bfalse(is_native, label1); __ std(R0, state_(_bcp)); __ b(label2); __ bind(label1); __ std(bytecode_addr, state_(_bcp)); __ bind(label2); } // state->_result._to_call._callee = NULL; __ std(R0, state_(_result._to_call._callee)); // state->_monitor_base = state; __ std(R14_state, state_(_monitor_base)); // state->_msg = BytecodeInterpreter::method_entry; __ li(R0, BytecodeInterpreter::method_entry); __ stw(R0, state_(_msg)); // state->_last_Java_sp = R1_SP; __ std(R1_SP, state_(_last_Java_sp)); // state->_stack_base = stack_base; __ std(stack_base, state_(_stack_base)); // tos = stack_base - 1 slot (prepushed); // state->_stack.Tos(tos); __ addi(tos, stack_base, - Interpreter::stackElementSize); __ std(tos, state_(_stack)); { BLOCK_COMMENT("get last_Java_pc:"); // if (!is_native) state->_last_Java_pc = ; // else state->_last_Java_pc = NULL; (just for neatness) Label label1, label2; __ btrue(is_native, label1); __ get_PC_trash_LR(R0); __ std(R0, state_(_last_Java_pc)); __ b(label2); __ bind(label1); __ li(R0, 0); __ std(R0, state_(_last_Java_pc)); __ bind(label2); } // stack_limit = tos - max_stack; __ sub(R0, tos, max_stack); // state->_stack_limit = stack_limit; __ std(R0, state_(_stack_limit)); // cache = method->constants()->cache(); __ ld(R0, ConstantPool::cache_offset_in_bytes(), constants); // state->_constants = method->constants()->cache(); __ std(R0, state_(_constants)); //============================================================================= // synchronized method, allocate and initialize method object lock. // if (!method->is_synchronized()) goto fill_locals_with_0x0s; Label fill_locals_with_0x0s; __ bfalse(is_synced, fill_locals_with_0x0s); // pool_holder = method->constants()->pool_holder(); const int mirror_offset = in_bytes(Klass::java_mirror_offset()); { Label label1, label2; // lockee = NULL; for java methods, correct value will be inserted in BytecodeInterpretMethod.hpp __ li(R0,0); __ bfalse(is_native, label2); __ bfalse(is_static, label1); // if (method->is_static()) lockee = // pool_holder->klass_part()->java_mirror(); __ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(), constants); __ ld(R0/*lockee*/, mirror_offset, R11_scratch1/*pool_holder*/); __ b(label2); __ bind(label1); // else lockee = *(oop*)locals; __ ld(R0/*lockee*/, 0, R18_locals); __ bind(label2); // monitor->set_obj(lockee); __ std(R0/*lockee*/, BasicObjectLock::obj_offset_in_bytes(), stack_base); } // See if we need to zero the locals __ BIND(fill_locals_with_0x0s); //============================================================================= // fill locals with 0x0s Label locals_zeroed; __ btrue(is_native, locals_zeroed); if (true /* zerolocals */ || ClearInterpreterLocals) { // local_count is already num_locals_slots - num_param_slots __ sldi(R0, parameter_count, Interpreter::logStackElementSize); __ sub(local_addr, R18_locals, R0); __ cmpdi(CCR0, local_count, 0); __ ble(CCR0, locals_zeroed); __ mtctr(local_count); //__ ld_const_addr(R0, (address) 0xcafe0000babe); __ li(R0, 0); Label zero_slot; __ bind(zero_slot); // first local is at local_addr __ std(R0, 0, local_addr); __ addi(local_addr, local_addr, -BytesPerWord); __ bdnz(zero_slot); } __ BIND(locals_zeroed); } BLOCK_COMMENT("} compute_interpreter_state"); } // Generate code to initiate compilation on invocation counter overflow. void CppInterpreterGenerator::generate_counter_overflow(Label& continue_entry) { // Registers alive // R14_state // R16_thread // // Registers updated // R14_state // R3_ARG1 (=R3_RET) // R4_ARG2 // After entering the vm we remove the activation and retry the // entry point in case the compilation is complete. // InterpreterRuntime::frequency_counter_overflow takes one argument // that indicates if the counter overflow occurs at a backwards // branch (NULL bcp). We pass zero. The call returns the address // of the verified entry point for the method or NULL if the // compilation did not complete (either went background or bailed // out). __ li(R4_ARG2, 0); // Pass false to call_VM so it doesn't check for pending exceptions, // since at this point in the method invocation the exception // handler would try to exit the monitor of synchronized methods // which haven't been entered yet. // // Returns verified_entry_point or NULL, we don't care which. // // Do not use the variant `frequency_counter_overflow' that returns // a structure, because this will change the argument list by a // hidden parameter (gcc 4.1). __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R4_ARG2, false); // Returns verified_entry_point or NULL, we don't care which as we ignore it // and run interpreted. // Reload method, it may have moved. __ ld(R19_method, state_(_method)); // We jump now to the label "continue_after_compile". __ b(continue_entry); } // Increment invocation count and check for overflow. // // R19_method must contain Method* of method to profile. void CppInterpreterGenerator::generate_counter_incr(Label& overflow) { Label done; const Register Rcounters = R12_scratch2; const Register iv_be_count = R11_scratch1; const Register invocation_limit = R12_scratch2; const Register invocation_limit_addr = invocation_limit; // Load and ev. allocate MethodCounters object. __ get_method_counters(R19_method, Rcounters, done); // Update standard invocation counters. __ increment_invocation_counter(Rcounters, iv_be_count, R0); // Compare against limit. BLOCK_COMMENT("Compare counter against limit:"); assert(4 == sizeof(InvocationCounter::InterpreterInvocationLimit), "must be 4 bytes"); __ load_const(invocation_limit_addr, (address)&InvocationCounter::InterpreterInvocationLimit); __ lwa(invocation_limit, 0, invocation_limit_addr); __ cmpw(CCR0, iv_be_count, invocation_limit); __ bge(CCR0, overflow); __ bind(done); } // // Call a JNI method. // // Interpreter stub for calling a native method. (C++ interpreter) // This sets up a somewhat different looking stack for calling the native method // than the typical interpreter frame setup. // address CppInterpreterGenerator::generate_native_entry(void) { if (native_entry != NULL) return native_entry; address entry = __ pc(); // Read // R16_thread // R15_prev_state - address of caller's BytecodeInterpreter, if this snippet // gets called by the frame manager. // R19_method - callee's Method // R17_tos - address of caller's tos // R1_SP - caller's stack pointer // R21_sender_SP - initial caller sp // // Update // R14_state - address of caller's BytecodeInterpreter // R3_RET - integer result, if any. // F1_RET - float result, if any. // // // Stack layout at this point: // // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP // alignment (optional) // [outgoing Java arguments] <-- R17_tos // ... // PARENT [PARENT_IJAVA_FRAME_ABI] // ... // const bool inc_counter = UseCompiler || CountCompiledCalls; const Register signature_handler_fd = R21_tmp1; const Register pending_exception = R22_tmp2; const Register result_handler_addr = R23_tmp3; const Register native_method_fd = R24_tmp4; const Register access_flags = R25_tmp5; const Register active_handles = R26_tmp6; const Register sync_state = R27_tmp7; const Register sync_state_addr = sync_state; // Address is dead after use. const Register suspend_flags = R24_tmp4; const Register return_pc = R28_tmp8; // Register will be locked for some time. const ConditionRegister is_synced = CCR4_is_synced; // Live-on-exit from compute_interpreter_state. // R1_SP still points to caller's SP at this point. // Save initial_caller_sp to caller's abi. The caller frame must be // resized before returning to get rid of the c2i arguments (if // any). // Override the saved SP with the senderSP so we can pop c2i // arguments (if any) off when we return __ std(R21_sender_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP); // Save LR to caller's frame. We don't use _abi(lr) here, because it is not safe. __ mflr(return_pc); __ std(return_pc, _top_ijava_frame_abi(frame_manager_lr), R1_SP); assert(return_pc->is_nonvolatile(), "return_pc must be a non-volatile register"); __ verify_method_ptr(R19_method); //============================================================================= // If this snippet gets called by the frame manager (at label // `call_special'), then R15_prev_state is valid. If this snippet // is not called by the frame manager, but e.g. by the call stub or // by compiled code, then R15_prev_state is invalid. { // Set R15_prev_state to 0 if we don't return to the frame // manager; we will return to the call_stub or to compiled code // instead. If R15_prev_state is 0 there will be only one // interpreter frame (we will set this up later) in this C frame! // So we must take care about retrieving prev_state_(_prev_link) // and restoring R1_SP when popping that interpreter. Label prev_state_is_valid; __ load_const(R11_scratch1/*frame_manager_returnpc_addr*/, (address)&frame_manager_specialized_return); __ ld(R12_scratch2/*frame_manager_returnpc*/, 0, R11_scratch1/*frame_manager_returnpc_addr*/); __ cmpd(CCR0, return_pc, R12_scratch2/*frame_manager_returnpc*/); __ beq(CCR0, prev_state_is_valid); __ li(R15_prev_state, 0); __ BIND(prev_state_is_valid); } //============================================================================= // Allocate new frame and initialize interpreter state. Label exception_return; Label exception_return_sync_check; Label stack_overflow_return; // Generate new interpreter state and jump to stack_overflow_return in case of // a stack overflow. generate_compute_interpreter_state(stack_overflow_return); //============================================================================= // Increment invocation counter. On overflow, entry to JNI method // will be compiled. Label invocation_counter_overflow; if (inc_counter) { generate_counter_incr(invocation_counter_overflow); } Label continue_after_compile; __ BIND(continue_after_compile); // access_flags = method->access_flags(); // Load access flags. assert(access_flags->is_nonvolatile(), "access_flags must be in a non-volatile register"); // Type check. // TODO: PPC port: assert(4 == methodOopDesc::sz_access_flags(), "unexpected field size"); __ lwz(access_flags, method_(access_flags)); // We don't want to reload R19_method and access_flags after calls // to some helper functions. assert(R19_method->is_nonvolatile(), "R19_method must be a non-volatile register"); // Check for synchronized methods. Must happen AFTER invocation counter // check, so method is not locked if counter overflows. { Label method_is_not_synced; // Is_synced is still alive. assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile"); __ bfalse(is_synced, method_is_not_synced); lock_method(); // Reload method, it may have moved. __ ld(R19_method, state_(_method)); __ BIND(method_is_not_synced); } // jvmti/jvmpi support __ notify_method_entry(); // Reload method, it may have moved. __ ld(R19_method, state_(_method)); //============================================================================= // Get and call the signature handler __ ld(signature_handler_fd, method_(signature_handler)); Label call_signature_handler; __ cmpdi(CCR0, signature_handler_fd, 0); __ bne(CCR0, call_signature_handler); // Method has never been called. Either generate a specialized // handler or point to the slow one. // // Pass parameter 'false' to avoid exception check in call_VM. __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), R19_method, false); // Check for an exception while looking up the target method. If we // incurred one, bail. __ ld(pending_exception, thread_(pending_exception)); __ cmpdi(CCR0, pending_exception, 0); __ bne(CCR0, exception_return_sync_check); // has pending exception // reload method __ ld(R19_method, state_(_method)); // Reload signature handler, it may have been created/assigned in the meanwhile __ ld(signature_handler_fd, method_(signature_handler)); __ BIND(call_signature_handler); // Before we call the signature handler we push a new frame to // protect the interpreter frame volatile registers when we return // from jni but before we can get back to Java. // First set the frame anchor while the SP/FP registers are // convenient and the slow signature handler can use this same frame // anchor. // We have a TOP_IJAVA_FRAME here, which belongs to us. __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R12_scratch2/*tmp*/); // Now the interpreter frame (and its call chain) have been // invalidated and flushed. We are now protected against eager // being enabled in native code. Even if it goes eager the // registers will be reloaded as clean and we will invalidate after // the call so no spurious flush should be possible. // Call signature handler and pass locals address. // // Our signature handlers copy required arguments to the C stack // (outgoing C args), R3_ARG1 to R10_ARG8, and F1_ARG1 to // F13_ARG13. __ mr(R3_ARG1, R18_locals); #if !defined(ABI_ELFv2) __ ld(signature_handler_fd, 0, signature_handler_fd); #endif __ call_stub(signature_handler_fd); // reload method __ ld(R19_method, state_(_method)); // Remove the register parameter varargs slots we allocated in // compute_interpreter_state. SP+16 ends up pointing to the ABI // outgoing argument area. // // Not needed on PPC64. //__ add(SP, SP, Argument::n_register_parameters*BytesPerWord); assert(result_handler_addr->is_nonvolatile(), "result_handler_addr must be in a non-volatile register"); // Save across call to native method. __ mr(result_handler_addr, R3_RET); // Set up fixed parameters and call the native method. // If the method is static, get mirror into R4_ARG2. { Label method_is_not_static; // access_flags is non-volatile and still, no need to restore it // restore access flags __ testbitdi(CCR0, R0, access_flags, JVM_ACC_STATIC_BIT); __ bfalse(CCR0, method_is_not_static); // constants = method->constants(); __ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method); __ ld(R11_scratch1/*constants*/, in_bytes(ConstMethod::constants_offset()), R11_scratch1); // pool_holder = method->constants()->pool_holder(); __ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(), R11_scratch1/*constants*/); const int mirror_offset = in_bytes(Klass::java_mirror_offset()); // mirror = pool_holder->klass_part()->java_mirror(); __ ld(R0/*mirror*/, mirror_offset, R11_scratch1/*pool_holder*/); // state->_native_mirror = mirror; __ std(R0/*mirror*/, state_(_oop_temp)); // R4_ARG2 = &state->_oop_temp; __ addir(R4_ARG2, state_(_oop_temp)); __ BIND(method_is_not_static); } // At this point, arguments have been copied off the stack into // their JNI positions. Oops are boxed in-place on the stack, with // handles copied to arguments. The result handler address is in a // register. // pass JNIEnv address as first parameter __ addir(R3_ARG1, thread_(jni_environment)); // Load the native_method entry before we change the thread state. __ ld(native_method_fd, method_(native_function)); //============================================================================= // Transition from _thread_in_Java to _thread_in_native. As soon as // we make this change the safepoint code needs to be certain that // the last Java frame we established is good. The pc in that frame // just needs to be near here not an actual return address. // We use release_store_fence to update values like the thread state, where // we don't want the current thread to continue until all our prior memory // accesses (including the new thread state) are visible to other threads. __ li(R0, _thread_in_native); __ release(); // TODO: PPC port: assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); __ stw(R0, thread_(thread_state)); if (UseMembar) { __ fence(); } //============================================================================= // Call the native method. Argument registers must not have been // overwritten since "__ call_stub(signature_handler);" (except for // ARG1 and ARG2 for static methods) __ call_c(native_method_fd); __ std(R3_RET, state_(_native_lresult)); __ stfd(F1_RET, state_(_native_fresult)); // The frame_manager_lr field, which we use for setting the last // java frame, gets overwritten by the signature handler. Restore // it now. __ get_PC_trash_LR(R11_scratch1); __ std(R11_scratch1, _top_ijava_frame_abi(frame_manager_lr), R1_SP); // Because of GC R19_method may no longer be valid. // Block, if necessary, before resuming in _thread_in_Java state. // In order for GC to work, don't clear the last_Java_sp until after // blocking. //============================================================================= // Switch thread to "native transition" state before reading the // synchronization state. This additional state is necessary // because reading and testing the synchronization state is not // atomic w.r.t. GC, as this scenario demonstrates: Java thread A, // in _thread_in_native state, loads _not_synchronized and is // preempted. VM thread changes sync state to synchronizing and // suspends threads for GC. Thread A is resumed to finish this // native method, but doesn't block here since it didn't see any // synchronization in progress, and escapes. // We use release_store_fence to update values like the thread state, where // we don't want the current thread to continue until all our prior memory // accesses (including the new thread state) are visible to other threads. __ li(R0/*thread_state*/, _thread_in_native_trans); __ release(); __ stw(R0/*thread_state*/, thread_(thread_state)); if (UseMembar) { __ fence(); } // Write serialization page so that the VM thread can do a pseudo remote // membar. We use the current thread pointer to calculate a thread // specific offset to write to within the page. This minimizes bus // traffic due to cache line collision. else { __ serialize_memory(R16_thread, R11_scratch1, R12_scratch2); } // Now before we return to java we must look for a current safepoint // (a new safepoint can not start since we entered native_trans). // We must check here because a current safepoint could be modifying // the callers registers right this moment. // Acquire isn't strictly necessary here because of the fence, but // sync_state is declared to be volatile, so we do it anyway. __ load_const(sync_state_addr, SafepointSynchronize::address_of_state()); // TODO: PPC port: assert(4 == SafepointSynchronize::sz_state(), "unexpected field size"); __ lwz(sync_state, 0, sync_state_addr); // TODO: PPC port: assert(4 == Thread::sz_suspend_flags(), "unexpected field size"); __ lwz(suspend_flags, thread_(suspend_flags)); __ acquire(); Label sync_check_done; Label do_safepoint; // No synchronization in progress nor yet synchronized __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized); // not suspended __ cmpwi(CCR1, suspend_flags, 0); __ bne(CCR0, do_safepoint); __ beq(CCR1, sync_check_done); __ bind(do_safepoint); // Block. We do the call directly and leave the current // last_Java_frame setup undisturbed. We must save any possible // native result acrosss the call. No oop is present __ mr(R3_ARG1, R16_thread); #if defined(ABI_ELFv2) __ call_c(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::none); #else __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, JavaThread::check_special_condition_for_native_trans), relocInfo::none); #endif __ bind(sync_check_done); //============================================================================= // <<<<<< Back in Interpreter Frame >>>>> // We are in thread_in_native_trans here and back in the normal // interpreter frame. We don't have to do anything special about // safepoints and we can switch to Java mode anytime we are ready. // Note: frame::interpreter_frame_result has a dependency on how the // method result is saved across the call to post_method_exit. For // native methods it assumes that the non-FPU/non-void result is // saved in _native_lresult and a FPU result in _native_fresult. If // this changes then the interpreter_frame_result implementation // will need to be updated too. // On PPC64, we have stored the result directly after the native call. //============================================================================= // back in Java // We use release_store_fence to update values like the thread state, where // we don't want the current thread to continue until all our prior memory // accesses (including the new thread state) are visible to other threads. __ li(R0/*thread_state*/, _thread_in_Java); __ release(); __ stw(R0/*thread_state*/, thread_(thread_state)); if (UseMembar) { __ fence(); } __ reset_last_Java_frame(); // Reload GR27_method, call killed it. We can't look at // state->_method until we're back in java state because in java // state gc can't happen until we get to a safepoint. // // We've set thread_state to _thread_in_Java already, so restoring // R19_method from R14_state works; R19_method is invalid, because // GC may have happened. __ ld(R19_method, state_(_method)); // reload method, may have moved // jvmdi/jvmpi support. Whether we've got an exception pending or // not, and whether unlocking throws an exception or not, we notify // on native method exit. If we do have an exception, we'll end up // in the caller's context to handle it, so if we don't do the // notify here, we'll drop it on the floor. __ notify_method_exit(true/*native method*/, ilgl /*illegal state (not used for native methods)*/, InterpreterMacroAssembler::NotifyJVMTI, false /*check_exceptions*/); //============================================================================= // Handle exceptions // See if we must unlock. // { Label method_is_not_synced; // is_synced is still alive assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile"); __ bfalse(is_synced, method_is_not_synced); unlock_method(); __ bind(method_is_not_synced); } // Reset active handles after returning from native. // thread->active_handles()->clear(); __ ld(active_handles, thread_(active_handles)); // JNIHandleBlock::_top is an int. // TODO: PPC port: assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size"); __ li(R0, 0); __ stw(R0, JNIHandleBlock::top_offset_in_bytes(), active_handles); Label no_pending_exception_from_native_method; __ ld(R0/*pending_exception*/, thread_(pending_exception)); __ cmpdi(CCR0, R0/*pending_exception*/, 0); __ beq(CCR0, no_pending_exception_from_native_method); //----------------------------------------------------------------------------- // An exception is pending. We call into the runtime only if the // caller was not interpreted. If it was interpreted the // interpreter will do the correct thing. If it isn't interpreted // (call stub/compiled code) we will change our return and continue. __ BIND(exception_return); Label return_to_initial_caller_with_pending_exception; __ cmpdi(CCR0, R15_prev_state, 0); __ beq(CCR0, return_to_initial_caller_with_pending_exception); // We are returning to an interpreter activation, just pop the state, // pop our frame, leave the exception pending, and return. __ pop_interpreter_state(/*prev_state_may_be_0=*/false); __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1 /* set to return pc */, R22_tmp2); __ mtlr(R21_tmp1); __ blr(); __ BIND(exception_return_sync_check); assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile"); __ bfalse(is_synced, exception_return); unlock_method(); __ b(exception_return); __ BIND(return_to_initial_caller_with_pending_exception); // We are returning to a c2i-adapter / call-stub, get the address of the // exception handler, pop the frame and return to the handler. // First, pop to caller's frame. __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1 /* set to return pc */, R22_tmp2); __ push_frame_reg_args(0, R11_scratch1); // Get the address of the exception handler. __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), R16_thread, R21_tmp1 /* return pc */); __ pop_frame(); // Load the PC of the the exception handler into LR. __ mtlr(R3_RET); // Load exception into R3_ARG1 and clear pending exception in thread. __ ld(R3_ARG1/*exception*/, thread_(pending_exception)); __ li(R4_ARG2, 0); __ std(R4_ARG2, thread_(pending_exception)); // Load the original return pc into R4_ARG2. __ mr(R4_ARG2/*issuing_pc*/, R21_tmp1); // Resize frame to get rid of a potential extension. __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); // Return to exception handler. __ blr(); //----------------------------------------------------------------------------- // No exception pending. __ BIND(no_pending_exception_from_native_method); // Move native method result back into proper registers and return. // Invoke result handler (may unbox/promote). __ ld(R3_RET, state_(_native_lresult)); __ lfd(F1_RET, state_(_native_fresult)); __ call_stub(result_handler_addr); // We have created a new BytecodeInterpreter object, now we must destroy it. // // Restore previous R14_state and caller's SP. R15_prev_state may // be 0 here, because our caller may be the call_stub or compiled // code. __ pop_interpreter_state(/*prev_state_may_be_0=*/true); __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1 /* set to return pc */, R22_tmp2); // Resize frame to get rid of a potential extension. __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); // Must use the return pc which was loaded from the caller's frame // as the VM uses return-pc-patching for deoptimization. __ mtlr(R21_tmp1); __ blr(); //============================================================================= // We encountered an exception while computing the interpreter // state, so R14_state isn't valid. Act as if we just returned from // the callee method with a pending exception. __ BIND(stack_overflow_return); // // Register state: // R14_state invalid; trashed by compute_interpreter_state // R15_prev_state valid, but may be 0 // // R1_SP valid, points to caller's SP; wasn't yet updated by // compute_interpreter_state // // Create exception oop and make it pending. // Throw the exception via RuntimeStub "throw_StackOverflowError_entry". // // Previously, we called C-Code directly. As a consequence, a // possible GC tried to process the argument oops of the top frame // (see RegisterMap::clear, which sets the corresponding flag to // true). This lead to crashes because: // 1. The top register map did not contain locations for the argument registers // 2. The arguments are dead anyway, could be already overwritten in the worst case // Solution: Call via special runtime stub that pushes it's own // frame. This runtime stub has the flag "CodeBlob::caller_must_gc_arguments()" // set to "false", what prevents the dead arguments getting GC'd. // // 2 cases exist: // 1. We were called by the c2i adapter / call stub // 2. We were called by the frame manager // // Both cases are handled by this code: // 1. - initial_caller_sp was saved in both cases on entry, so it's safe to load it back even if it was not changed. // - control flow will be: // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->excp_blob of caller method // 2. - control flow will be: // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->rethrow_excp_entry of frame manager->resume_method // Since we restored the caller SP above, the rethrow_excp_entry can restore the original interpreter state // registers using the stack and resume the calling method with a pending excp. // Pop any c2i extension from the stack, restore LR just to be sure __ ld(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP); __ mtlr(R0); // Resize frame to get rid of a potential extension. __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "generated in wrong order"); // Load target address of the runtime stub. __ load_const(R12_scratch2, (StubRoutines::throw_StackOverflowError_entry())); __ mtctr(R12_scratch2); __ bctr(); //============================================================================= // Counter overflow. if (inc_counter) { // Handle invocation counter overflow __ bind(invocation_counter_overflow); generate_counter_overflow(continue_after_compile); } native_entry = entry; return entry; } bool AbstractInterpreter::can_be_compiled(methodHandle m) { // No special entry points that preclude compilation. return true; } // Unlock the current method. // void CppInterpreterGenerator::unlock_method(void) { // Find preallocated monitor and unlock method. Method monitor is // the first one. // Registers alive // R14_state // // Registers updated // volatiles // const Register monitor = R4_ARG2; // Pass address of initial monitor we allocated. // // First monitor. __ addi(monitor, R14_state, -frame::interpreter_frame_monitor_size_in_bytes()); // Unlock method __ unlock_object(monitor); } // Lock the current method. // void CppInterpreterGenerator::lock_method(void) { // Find preallocated monitor and lock method. Method monitor is the // first one. // // Registers alive // R14_state // // Registers updated // volatiles // const Register monitor = R4_ARG2; const Register object = R5_ARG3; // Pass address of initial monitor we allocated. __ addi(monitor, R14_state, -frame::interpreter_frame_monitor_size_in_bytes()); // Pass object address. __ ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor); // Lock method. __ lock_object(monitor, object); } // Generate code for handling resuming a deopted method. void CppInterpreterGenerator::generate_deopt_handling(Register result_index) { //============================================================================= // Returning from a compiled method into a deopted method. The // bytecode at the bcp has completed. The result of the bytecode is // in the native abi (the tosca for the template based // interpreter). Any stack space that was used by the bytecode that // has completed has been removed (e.g. parameters for an invoke) so // all that we have to do is place any pending result on the // expression stack and resume execution on the next bytecode. Label return_from_deopt_common; // R3_RET and F1_RET are live here! Load the array index of the // required result stub address and continue at return_from_deopt_common. // Deopt needs to jump to here to enter the interpreter (return a result). deopt_frame_manager_return_atos = __ pc(); __ li(result_index, AbstractInterpreter::BasicType_as_index(T_OBJECT)); __ b(return_from_deopt_common); deopt_frame_manager_return_btos = __ pc(); __ li(result_index, AbstractInterpreter::BasicType_as_index(T_BOOLEAN)); __ b(return_from_deopt_common); deopt_frame_manager_return_itos = __ pc(); __ li(result_index, AbstractInterpreter::BasicType_as_index(T_INT)); __ b(return_from_deopt_common); deopt_frame_manager_return_ltos = __ pc(); __ li(result_index, AbstractInterpreter::BasicType_as_index(T_LONG)); __ b(return_from_deopt_common); deopt_frame_manager_return_ftos = __ pc(); __ li(result_index, AbstractInterpreter::BasicType_as_index(T_FLOAT)); __ b(return_from_deopt_common); deopt_frame_manager_return_dtos = __ pc(); __ li(result_index, AbstractInterpreter::BasicType_as_index(T_DOUBLE)); __ b(return_from_deopt_common); deopt_frame_manager_return_vtos = __ pc(); __ li(result_index, AbstractInterpreter::BasicType_as_index(T_VOID)); // Last one, fall-through to return_from_deopt_common. // Deopt return common. An index is present that lets us move any // possible result being return to the interpreter's stack. // __ BIND(return_from_deopt_common); } // Generate the code to handle a more_monitors message from the c++ interpreter. void CppInterpreterGenerator::generate_more_monitors() { // // Registers alive // R16_thread - JavaThread* // R15_prev_state - previous BytecodeInterpreter or 0 // R14_state - BytecodeInterpreter* address of receiver's interpreter state // R1_SP - old stack pointer // // Registers updated // R1_SP - new stack pointer // // Very-local scratch registers. const Register old_tos = R21_tmp1; const Register new_tos = R22_tmp2; const Register stack_base = R23_tmp3; const Register stack_limit = R24_tmp4; const Register slot = R25_tmp5; const Register n_slots = R25_tmp5; // Interpreter state fields. const Register msg = R24_tmp4; // Load up relevant interpreter state. __ ld(stack_base, state_(_stack_base)); // Old stack_base __ ld(old_tos, state_(_stack)); // Old tos __ ld(stack_limit, state_(_stack_limit)); // Old stack_limit // extracted monitor_size int monitor_size = frame::interpreter_frame_monitor_size_in_bytes(); assert(Assembler::is_aligned((unsigned int)monitor_size, (unsigned int)frame::alignment_in_bytes), "size of a monitor must respect alignment of SP"); // Save and restore top LR __ ld(R12_scratch2, _top_ijava_frame_abi(frame_manager_lr), R1_SP); __ resize_frame(-monitor_size, R11_scratch1);// Allocate space for new monitor __ std(R12_scratch2, _top_ijava_frame_abi(frame_manager_lr), R1_SP); // Initial_caller_sp is used as unextended_sp for non initial callers. __ std(R1_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP); __ addi(stack_base, stack_base, -monitor_size); // New stack_base __ addi(new_tos, old_tos, -monitor_size); // New tos __ addi(stack_limit, stack_limit, -monitor_size); // New stack_limit __ std(R1_SP, state_(_last_Java_sp)); // Update frame_bottom __ std(stack_base, state_(_stack_base)); // Update stack_base __ std(new_tos, state_(_stack)); // Update tos __ std(stack_limit, state_(_stack_limit)); // Update stack_limit __ li(msg, BytecodeInterpreter::got_monitors); // Tell interpreter we allocated the lock __ stw(msg, state_(_msg)); // Shuffle expression stack down. Recall that stack_base points // just above the new expression stack bottom. Old_tos and new_tos // are used to scan thru the old and new expression stacks. Label copy_slot, copy_slot_finished; __ sub(n_slots, stack_base, new_tos); __ srdi_(n_slots, n_slots, LogBytesPerWord); // compute number of slots to copy assert(LogBytesPerWord == 3, "conflicts assembler instructions"); __ beq(CCR0, copy_slot_finished); // nothing to copy __ mtctr(n_slots); // loop __ bind(copy_slot); __ ldu(slot, BytesPerWord, old_tos); // slot = *++old_tos; __ stdu(slot, BytesPerWord, new_tos); // *++new_tos = slot; __ bdnz(copy_slot); __ bind(copy_slot_finished); // Restart interpreter __ li(R0, 0); __ std(R0, BasicObjectLock::obj_offset_in_bytes(), stack_base); // Mark lock as unused } address CppInterpreterGenerator::generate_normal_entry(void) { if (interpreter_frame_manager != NULL) return interpreter_frame_manager; address entry = __ pc(); address return_from_native_pc = (address) NULL; // Initial entry to frame manager (from call_stub or c2i_adapter) // // Registers alive // R16_thread - JavaThread* // R19_method - callee's Method (method to be invoked) // R17_tos - address of sender tos (prepushed) // R1_SP - SP prepared by call stub such that caller's outgoing args are near top // LR - return address to caller (call_stub or c2i_adapter) // R21_sender_SP - initial caller sp // // Registers updated // R15_prev_state - 0 // // Stack layout at this point: // // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP // alignment (optional) // [outgoing Java arguments] <-- R17_tos // ... // PARENT [PARENT_IJAVA_FRAME_ABI] // ... // // Save initial_caller_sp to caller's abi. // The caller frame must be resized before returning to get rid of // the c2i part on top of the calling compiled frame (if any). // R21_tmp1 must match sender_sp in gen_c2i_adapter. // Now override the saved SP with the senderSP so we can pop c2i // arguments (if any) off when we return. __ std(R21_sender_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP); // Save LR to caller's frame. We don't use _abi(lr) here, // because it is not safe. __ mflr(R0); __ std(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP); // If we come here, it is the first invocation of the frame manager. // So there is no previous interpreter state. __ li(R15_prev_state, 0); // Fall through to where "recursive" invocations go. //============================================================================= // Dispatch an instance of the interpreter. Recursive activations // come here. Label re_dispatch; __ BIND(re_dispatch); // // Registers alive // R16_thread - JavaThread* // R19_method - callee's Method // R17_tos - address of caller's tos (prepushed) // R15_prev_state - address of caller's BytecodeInterpreter or 0 // R1_SP - caller's SP trimmed such that caller's outgoing args are near top. // // Stack layout at this point: // // 0 [TOP_IJAVA_FRAME_ABI] // alignment (optional) // [outgoing Java arguments] // ... // PARENT [PARENT_IJAVA_FRAME_ABI] // ... // fall through to interpreted execution //============================================================================= // Allocate a new Java frame and initialize the new interpreter state. Label stack_overflow_return; // Create a suitable new Java frame plus a new BytecodeInterpreter instance // in the current (frame manager's) C frame. generate_compute_interpreter_state(stack_overflow_return); // fall through //============================================================================= // Interpreter dispatch. Label call_interpreter; __ BIND(call_interpreter); // // Registers alive // R16_thread - JavaThread* // R15_prev_state - previous BytecodeInterpreter or 0 // R14_state - address of receiver's BytecodeInterpreter // R1_SP - receiver's stack pointer // // Thread fields. const Register pending_exception = R21_tmp1; // Interpreter state fields. const Register msg = R24_tmp4; // MethodOop fields. const Register parameter_count = R25_tmp5; const Register result_index = R26_tmp6; const Register dummy = R28_tmp8; // Address of various interpreter stubs. // R29_tmp9 is reserved. const Register stub_addr = R27_tmp7; // Uncommon trap needs to jump to here to enter the interpreter // (re-execute current bytecode). unctrap_frame_manager_entry = __ pc(); // If we are profiling, store our fp (BSP) in the thread so we can // find it during a tick. if (Arguments::has_profile()) { // On PPC64 we store the pointer to the current BytecodeInterpreter, // instead of the bsp of ia64. This should suffice to be able to // find all interesting information. __ std(R14_state, thread_(last_interpreter_fp)); } // R16_thread, R14_state and R15_prev_state are nonvolatile // registers. There is no need to save these. If we needed to save // some state in the current Java frame, this could be a place to do // so. // Call Java bytecode dispatcher passing "BytecodeInterpreter* istate". __ call_VM_leaf(CAST_FROM_FN_PTR(address, JvmtiExport::can_post_interpreter_events() ? BytecodeInterpreter::runWithChecks : BytecodeInterpreter::run), R14_state); interpreter_return_address = __ last_calls_return_pc(); // R16_thread, R14_state and R15_prev_state have their values preserved. // If we are profiling, clear the fp in the thread to tell // the profiler that we are no longer in the interpreter. if (Arguments::has_profile()) { __ li(R11_scratch1, 0); __ std(R11_scratch1, thread_(last_interpreter_fp)); } // Load message from bytecode dispatcher. // TODO: PPC port: guarantee(4 == BytecodeInterpreter::sz_msg(), "unexpected field size"); __ lwz(msg, state_(_msg)); Label more_monitors; Label return_from_native; Label return_from_native_common; Label return_from_native_no_exception; Label return_from_interpreted_method; Label return_from_recursive_activation; Label unwind_recursive_activation; Label resume_interpreter; Label return_to_initial_caller; Label unwind_initial_activation; Label unwind_initial_activation_pending_exception; Label call_method; Label call_special; Label retry_method; Label retry_method_osr; Label popping_frame; Label throwing_exception; // Branch according to the received message __ cmpwi(CCR1, msg, BytecodeInterpreter::call_method); __ cmpwi(CCR2, msg, BytecodeInterpreter::return_from_method); __ beq(CCR1, call_method); __ beq(CCR2, return_from_interpreted_method); __ cmpwi(CCR3, msg, BytecodeInterpreter::more_monitors); __ cmpwi(CCR4, msg, BytecodeInterpreter::throwing_exception); __ beq(CCR3, more_monitors); __ beq(CCR4, throwing_exception); __ cmpwi(CCR5, msg, BytecodeInterpreter::popping_frame); __ cmpwi(CCR6, msg, BytecodeInterpreter::do_osr); __ beq(CCR5, popping_frame); __ beq(CCR6, retry_method_osr); __ stop("bad message from interpreter"); //============================================================================= // Add a monitor just below the existing one(s). State->_stack_base // points to the lowest existing one, so we insert the new one just // below it and shuffle the expression stack down. Ref. the above // stack layout picture, we must update _stack_base, _stack, _stack_limit // and _last_Java_sp in the interpreter state. __ BIND(more_monitors); generate_more_monitors(); __ b(call_interpreter); generate_deopt_handling(result_index); // Restoring the R14_state is already done by the deopt_blob. // Current tos includes no parameter slots. __ ld(R17_tos, state_(_stack)); __ li(msg, BytecodeInterpreter::deopt_resume); __ b(return_from_native_common); // We are sent here when we are unwinding from a native method or // adapter with an exception pending. We need to notify the interpreter // that there is an exception to process. // We arrive here also if the frame manager called an (interpreted) target // which returns with a StackOverflow exception. // The control flow is in this case is: // frame_manager->throw_excp_stub->forward_excp->rethrow_excp_entry AbstractInterpreter::_rethrow_exception_entry = __ pc(); // Restore R14_state. __ ld(R14_state, 0, R1_SP); __ addi(R14_state, R14_state, -frame::interpreter_frame_cinterpreterstate_size_in_bytes()); // Store exception oop into thread object. __ std(R3_RET, thread_(pending_exception)); __ li(msg, BytecodeInterpreter::method_resume /*rethrow_exception*/); // // NOTE: the interpreter frame as setup be deopt does NOT include // any parameter slots (good thing since we have no callee here // and couldn't remove them) so we don't have to do any calculations // here to figure it out. // __ ld(R17_tos, state_(_stack)); __ b(return_from_native_common); //============================================================================= // Returning from a native method. Result is in the native abi // location so we must move it to the java expression stack. __ BIND(return_from_native); guarantee(return_from_native_pc == (address) NULL, "precondition"); return_from_native_pc = __ pc(); // Restore R14_state. __ ld(R14_state, 0, R1_SP); __ addi(R14_state, R14_state, -frame::interpreter_frame_cinterpreterstate_size_in_bytes()); // // Registers alive // R16_thread // R14_state - address of caller's BytecodeInterpreter. // R3_RET - integer result, if any. // F1_RET - float result, if any. // // Registers updated // R19_method - callee's Method // R17_tos - caller's tos, with outgoing args popped // result_index - index of result handler. // msg - message for resuming interpreter. // // Very-local scratch registers. const ConditionRegister have_pending_exception = CCR0; // Load callee Method, gc may have moved it. __ ld(R19_method, state_(_result._to_call._callee)); // Load address of caller's tos. includes parameter slots. __ ld(R17_tos, state_(_stack)); // Pop callee's parameters. __ ld(parameter_count, in_bytes(Method::const_offset()), R19_method); __ lhz(parameter_count, in_bytes(ConstMethod::size_of_parameters_offset()), parameter_count); __ sldi(parameter_count, parameter_count, Interpreter::logStackElementSize); __ add(R17_tos, R17_tos, parameter_count); // Result stub address array index // TODO: PPC port: assert(4 == methodOopDesc::sz_result_index(), "unexpected field size"); __ lwa(result_index, method_(result_index)); __ li(msg, BytecodeInterpreter::method_resume); // // Registers alive // R16_thread // R14_state - address of caller's BytecodeInterpreter. // R17_tos - address of caller's tos with outgoing args already popped // R3_RET - integer return value, if any. // F1_RET - float return value, if any. // result_index - index of result handler. // msg - message for resuming interpreter. // // Registers updated // R3_RET - new address of caller's tos, including result, if any // __ BIND(return_from_native_common); // Check for pending exception __ ld(pending_exception, thread_(pending_exception)); __ cmpdi(CCR0, pending_exception, 0); __ beq(CCR0, return_from_native_no_exception); // If there's a pending exception, we really have no result, so // R3_RET is dead. Resume_interpreter assumes the new tos is in // R3_RET. __ mr(R3_RET, R17_tos); // `resume_interpreter' expects R15_prev_state to be alive. __ ld(R15_prev_state, state_(_prev_link)); __ b(resume_interpreter); __ BIND(return_from_native_no_exception); // No pending exception, copy method result from native ABI register // to tos. // Address of stub descriptor address array. __ load_const(stub_addr, CppInterpreter::tosca_result_to_stack()); // Pass address of tos to stub. __ mr(R4_ARG2, R17_tos); // Address of stub descriptor address. __ sldi(result_index, result_index, LogBytesPerWord); __ add(stub_addr, stub_addr, result_index); // Stub descriptor address. __ ld(stub_addr, 0, stub_addr); // TODO: don't do this via a call, do it in place! // // call stub via descriptor // in R3_ARG1/F1_ARG1: result value (R3_RET or F1_RET) __ call_stub(stub_addr); // new tos = result of call in R3_RET // `resume_interpreter' expects R15_prev_state to be alive. __ ld(R15_prev_state, state_(_prev_link)); __ b(resume_interpreter); //============================================================================= // We encountered an exception while computing the interpreter // state, so R14_state isn't valid. Act as if we just returned from // the callee method with a pending exception. __ BIND(stack_overflow_return); // // Registers alive // R16_thread - JavaThread* // R1_SP - old stack pointer // R19_method - callee's Method // R17_tos - address of caller's tos (prepushed) // R15_prev_state - address of caller's BytecodeInterpreter or 0 // R18_locals - address of callee's locals array // // Registers updated // R3_RET - address of resuming tos, if recursive unwind Label Lskip_unextend_SP; { const ConditionRegister is_initial_call = CCR0; const Register tos_save = R21_tmp1; const Register tmp = R22_tmp2; assert(tos_save->is_nonvolatile(), "need a nonvolatile"); // Is the exception thrown in the initial Java frame of this frame // manager frame? __ cmpdi(is_initial_call, R15_prev_state, 0); __ bne(is_initial_call, Lskip_unextend_SP); // Pop any c2i extension from the stack. This is necessary in the // non-recursive case (that is we were called by the c2i adapter, // meaning we have to prev state). In this case we entered the frame // manager through a special entry which pushes the orignal // unextended SP to the stack. Here we load it back. __ ld(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP); __ mtlr(R0); // Resize frame to get rid of a potential extension. __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); // Fall through __ bind(Lskip_unextend_SP); // Throw the exception via RuntimeStub "throw_StackOverflowError_entry". // // Previously, we called C-Code directly. As a consequence, a // possible GC tried to process the argument oops of the top frame // (see RegisterMap::clear, which sets the corresponding flag to // true). This lead to crashes because: // 1. The top register map did not contain locations for the argument registers // 2. The arguments are dead anyway, could be already overwritten in the worst case // Solution: Call via special runtime stub that pushes it's own frame. This runtime stub has the flag // "CodeBlob::caller_must_gc_arguments()" set to "false", what prevents the dead arguments getting GC'd. // // 2 cases exist: // 1. We were called by the c2i adapter / call stub // 2. We were called by the frame manager // // Both cases are handled by this code: // 1. - initial_caller_sp was saved on stack => Load it back and we're ok // - control flow will be: // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->excp_blob of calling method // 2. - control flow will be: // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep-> // ->rethrow_excp_entry of frame manager->resume_method // Since we restored the caller SP above, the rethrow_excp_entry can restore the original interpreter state // registers using the stack and resume the calling method with a pending excp. assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "generated in wrong order"); __ load_const(R3_ARG1, (StubRoutines::throw_StackOverflowError_entry())); __ mtctr(R3_ARG1); __ bctr(); } //============================================================================= // We have popped a frame from an interpreted call. We are assured // of returning to an interpreted call by the popframe abi. We have // no return value all we have to do is pop the current frame and // then make sure that the top of stack (of the caller) gets set to // where it was when we entered the callee (i.e. the args are still // in place). Or we are returning to the interpreter. In the first // case we must extract result (if any) from the java expression // stack and store it in the location the native abi would expect // for a call returning this type. In the second case we must simply // do a stack to stack move as we unwind. __ BIND(popping_frame); // Registers alive // R14_state // R15_prev_state // R17_tos // // Registers updated // R19_method // R3_RET // msg { Label L; // Reload callee method, gc may have moved it. __ ld(R19_method, state_(_method)); // We may be returning to a deoptimized frame in which case the // usual assumption of a recursive return is not true. // not equal = is recursive call __ cmpdi(CCR0, R15_prev_state, 0); __ bne(CCR0, L); // Pop_frame capability. // The pop_frame api says that the underlying frame is a Java frame, in this case // (prev_state==null) it must be a compiled frame: // // Stack at this point: I, C2I + C, ... // // The outgoing arguments of the call have just been copied (popframe_preserve_args). // By the pop_frame api, we must end up in an interpreted frame. So the compiled frame // will be deoptimized. Deoptimization will restore the outgoing arguments from // popframe_preserve_args, adjust the tos such that it includes the popframe_preserve_args, // and adjust the bci such that the call will be executed again. // We have no results, just pop the interpreter frame, resize the compiled frame to get rid // of the c2i extension and return to the deopt_handler. __ b(unwind_initial_activation); // is recursive call __ bind(L); // Resume_interpreter expects the original tos in R3_RET. __ ld(R3_RET, prev_state_(_stack)); // We're done. __ li(msg, BytecodeInterpreter::popping_frame); __ b(unwind_recursive_activation); } //============================================================================= // We have finished an interpreted call. We are either returning to // native (call_stub/c2) or we are returning to the interpreter. // When returning to native, we must extract the result (if any) // from the java expression stack and store it in the location the // native abi expects. When returning to the interpreter we must // simply do a stack to stack move as we unwind. __ BIND(return_from_interpreted_method); // // Registers alive // R16_thread - JavaThread* // R15_prev_state - address of caller's BytecodeInterpreter or 0 // R14_state - address of callee's interpreter state // R1_SP - callee's stack pointer // // Registers updated // R19_method - callee's method // R3_RET - address of result (new caller's tos), // // if returning to interpreted // msg - message for interpreter, // if returning to interpreted // // Check if this is the initial invocation of the frame manager. // If so, R15_prev_state will be null. __ cmpdi(CCR0, R15_prev_state, 0); // Reload callee method, gc may have moved it. __ ld(R19_method, state_(_method)); // Load the method's result type. __ lwz(result_index, method_(result_index)); // Go to return_to_initial_caller if R15_prev_state is null. __ beq(CCR0, return_to_initial_caller); // Copy callee's result to caller's expression stack via inline stack-to-stack // converters. { Register new_tos = R3_RET; Register from_temp = R4_ARG2; Register from = R5_ARG3; Register tos = R6_ARG4; Register tmp1 = R7_ARG5; Register tmp2 = R8_ARG6; ConditionRegister result_type_is_void = CCR1; ConditionRegister result_type_is_long = CCR2; ConditionRegister result_type_is_double = CCR3; Label stack_to_stack_void; Label stack_to_stack_double_slot; // T_LONG, T_DOUBLE Label stack_to_stack_single_slot; // T_BOOLEAN, T_BYTE, T_CHAR, T_SHORT, T_INT, T_FLOAT, T_OBJECT Label stack_to_stack_done; // Pass callee's address of tos + BytesPerWord __ ld(from_temp, state_(_stack)); // result type: void __ cmpwi(result_type_is_void, result_index, AbstractInterpreter::BasicType_as_index(T_VOID)); // Pass caller's tos == callee's locals address __ ld(tos, state_(_locals)); // result type: long __ cmpwi(result_type_is_long, result_index, AbstractInterpreter::BasicType_as_index(T_LONG)); __ addi(from, from_temp, Interpreter::stackElementSize); // !! don't branch above this line !! // handle void __ beq(result_type_is_void, stack_to_stack_void); // result type: double __ cmpwi(result_type_is_double, result_index, AbstractInterpreter::BasicType_as_index(T_DOUBLE)); // handle long or double __ beq(result_type_is_long, stack_to_stack_double_slot); __ beq(result_type_is_double, stack_to_stack_double_slot); // fall through to single slot types (incl. object) { __ BIND(stack_to_stack_single_slot); // T_BOOLEAN, T_BYTE, T_CHAR, T_SHORT, T_INT, T_FLOAT, T_OBJECT __ ld(tmp1, 0, from); __ std(tmp1, 0, tos); // New expression stack top __ addi(new_tos, tos, - BytesPerWord); __ b(stack_to_stack_done); } { __ BIND(stack_to_stack_double_slot); // T_LONG, T_DOUBLE // Move both entries for debug purposes even though only one is live __ ld(tmp1, BytesPerWord, from); __ ld(tmp2, 0, from); __ std(tmp1, 0, tos); __ std(tmp2, -BytesPerWord, tos); // new expression stack top __ addi(new_tos, tos, - 2 * BytesPerWord); // two slots __ b(stack_to_stack_done); } { __ BIND(stack_to_stack_void); // T_VOID // new expression stack top __ mr(new_tos, tos); // fall through to stack_to_stack_done } __ BIND(stack_to_stack_done); } // new tos = R3_RET // Get the message for the interpreter __ li(msg, BytecodeInterpreter::method_resume); // And fall thru //============================================================================= // Restore caller's interpreter state and pass pointer to caller's // new tos to caller. __ BIND(unwind_recursive_activation); // // Registers alive // R15_prev_state - address of caller's BytecodeInterpreter // R3_RET - address of caller's tos // msg - message for caller's BytecodeInterpreter // R1_SP - callee's stack pointer // // Registers updated // R14_state - address of caller's BytecodeInterpreter // R15_prev_state - address of its parent or 0 // // Pop callee's interpreter and set R14_state to caller's interpreter. __ pop_interpreter_state(/*prev_state_may_be_0=*/false); // And fall thru //============================================================================= // Resume the (calling) interpreter after a call. __ BIND(resume_interpreter); // // Registers alive // R14_state - address of resuming BytecodeInterpreter // R15_prev_state - address of its parent or 0 // R3_RET - address of resuming tos // msg - message for resuming interpreter // R1_SP - callee's stack pointer // // Registers updated // R1_SP - caller's stack pointer // // Restore C stack pointer of caller (resuming interpreter), // R14_state already points to the resuming BytecodeInterpreter. __ pop_interpreter_frame_to_state(R14_state, R21_tmp1, R11_scratch1, R12_scratch2); // Store new address of tos (holding return value) in interpreter state. __ std(R3_RET, state_(_stack)); // Store message for interpreter. __ stw(msg, state_(_msg)); __ b(call_interpreter); //============================================================================= // Interpreter returning to native code (call_stub/c1/c2) from // initial activation. Convert stack result and unwind activation. __ BIND(return_to_initial_caller); // // Registers alive // R19_method - callee's Method // R14_state - address of callee's interpreter state // R16_thread - JavaThread // R1_SP - callee's stack pointer // // Registers updated // R3_RET/F1_RET - result in expected output register // // If we have an exception pending we have no result and we // must figure out where to really return to. // __ ld(pending_exception, thread_(pending_exception)); __ cmpdi(CCR0, pending_exception, 0); __ bne(CCR0, unwind_initial_activation_pending_exception); __ lwa(result_index, method_(result_index)); // Address of stub descriptor address array. __ load_const(stub_addr, CppInterpreter::stack_result_to_native()); // Pass address of callee's tos + BytesPerWord. // Will then point directly to result. __ ld(R3_ARG1, state_(_stack)); __ addi(R3_ARG1, R3_ARG1, Interpreter::stackElementSize); // Address of stub descriptor address __ sldi(result_index, result_index, LogBytesPerWord); __ add(stub_addr, stub_addr, result_index); // Stub descriptor address __ ld(stub_addr, 0, stub_addr); // TODO: don't do this via a call, do it in place! // // call stub via descriptor __ call_stub(stub_addr); __ BIND(unwind_initial_activation); // Unwind from initial activation. No exception is pending. // // Stack layout at this point: // // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP // ... // CALLER [PARENT_IJAVA_FRAME_ABI] // ... // CALLER [unextended ABI] // ... // // The CALLER frame has a C2I adapter or is an entry-frame. // // An interpreter frame exists, we may pop the TOP_IJAVA_FRAME and // turn the caller's PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME. // But, we simply restore the return pc from the caller's frame and // use the caller's initial_caller_sp as the new SP which pops the // interpreter frame and "resizes" the caller's frame to its "unextended" // size. // get rid of top frame __ pop_frame(); // Load return PC from parent frame. __ ld(R21_tmp1, _parent_ijava_frame_abi(lr), R1_SP); // Resize frame to get rid of a potential extension. __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); // update LR __ mtlr(R21_tmp1); // return __ blr(); //============================================================================= // Unwind from initial activation. An exception is pending __ BIND(unwind_initial_activation_pending_exception); // // Stack layout at this point: // // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP // ... // CALLER [PARENT_IJAVA_FRAME_ABI] // ... // CALLER [unextended ABI] // ... // // The CALLER frame has a C2I adapter or is an entry-frame. // // An interpreter frame exists, we may pop the TOP_IJAVA_FRAME and // turn the caller's PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME. // But, we just pop the current TOP_IJAVA_FRAME and fall through __ pop_frame(); __ ld(R3_ARG1, _top_ijava_frame_abi(lr), R1_SP); // // Stack layout at this point: // // CALLER [PARENT_IJAVA_FRAME_ABI] <-- R1_SP // ... // CALLER [unextended ABI] // ... // // The CALLER frame has a C2I adapter or is an entry-frame. // // Registers alive // R16_thread // R3_ARG1 - return address to caller // // Registers updated // R3_ARG1 - address of pending exception // R4_ARG2 - issuing pc = return address to caller // LR - address of exception handler stub // // Resize frame to get rid of a potential extension. __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); __ mr(R14, R3_ARG1); // R14 := ARG1 __ mr(R4_ARG2, R3_ARG1); // ARG2 := ARG1 // Find the address of the "catch_exception" stub. __ push_frame_reg_args(0, R11_scratch1); __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), R16_thread, R4_ARG2); __ pop_frame(); // Load continuation address into LR. __ mtlr(R3_RET); // Load address of pending exception and clear it in thread object. __ ld(R3_ARG1/*R3_RET*/, thread_(pending_exception)); __ li(R4_ARG2, 0); __ std(R4_ARG2, thread_(pending_exception)); // re-load issuing pc __ mr(R4_ARG2, R14); // Branch to found exception handler. __ blr(); //============================================================================= // Call a new method. Compute new args and trim the expression stack // to only what we are currently using and then recurse. __ BIND(call_method); // // Registers alive // R16_thread // R14_state - address of caller's BytecodeInterpreter // R1_SP - caller's stack pointer // // Registers updated // R15_prev_state - address of caller's BytecodeInterpreter // R17_tos - address of caller's tos // R19_method - callee's Method // R1_SP - trimmed back // // Very-local scratch registers. const Register offset = R21_tmp1; const Register tmp = R22_tmp2; const Register self_entry = R23_tmp3; const Register stub_entry = R24_tmp4; const ConditionRegister cr = CCR0; // Load the address of the frame manager. __ load_const(self_entry, &interpreter_frame_manager); __ ld(self_entry, 0, self_entry); // Load BytecodeInterpreter._result._to_call._callee (callee's Method). __ ld(R19_method, state_(_result._to_call._callee)); // Load BytecodeInterpreter._stack (outgoing tos). __ ld(R17_tos, state_(_stack)); // Save address of caller's BytecodeInterpreter. __ mr(R15_prev_state, R14_state); // Load the callee's entry point. // Load BytecodeInterpreter._result._to_call._callee_entry_point. __ ld(stub_entry, state_(_result._to_call._callee_entry_point)); // Check whether stub_entry is equal to self_entry. __ cmpd(cr, self_entry, stub_entry); // if (self_entry == stub_entry) // do a re-dispatch __ beq(cr, re_dispatch); // else // call the specialized entry (adapter for jni or compiled code) __ BIND(call_special); // // Call the entry generated by `InterpreterGenerator::generate_native_entry'. // // Registers alive // R16_thread // R15_prev_state - address of caller's BytecodeInterpreter // R19_method - callee's Method // R17_tos - address of caller's tos // R1_SP - caller's stack pointer // // Mark return from specialized entry for generate_native_entry. guarantee(return_from_native_pc != (address) NULL, "precondition"); frame_manager_specialized_return = return_from_native_pc; // Set sender_SP in case we call interpreter native wrapper which // will expect it. Compiled code should not care. __ mr(R21_sender_SP, R1_SP); // Do a tail call here, and let the link register point to // frame_manager_specialized_return which is return_from_native_pc. __ load_const(tmp, frame_manager_specialized_return); __ call_stub_and_return_to(stub_entry, tmp /* return_pc=tmp */); //============================================================================= // // InterpretMethod triggered OSR compilation of some Java method M // and now asks to run the compiled code. We call this code the // `callee'. // // This is our current idea on how OSR should look like on PPC64: // // While interpreting a Java method M the stack is: // // (InterpretMethod (M), IJAVA_FRAME (M), ANY_FRAME, ...). // // After having OSR compiled M, `InterpretMethod' returns to the // frame manager, sending the message `retry_method_osr'. The stack // is: // // (IJAVA_FRAME (M), ANY_FRAME, ...). // // The compiler will have generated an `nmethod' suitable for // continuing execution of M at the bytecode index at which OSR took // place. So now the frame manager calls the OSR entry. The OSR // entry sets up a JIT_FRAME for M and continues execution of M with // initial state determined by the IJAVA_FRAME. // // (JIT_FRAME (M), IJAVA_FRAME (M), ANY_FRAME, ...). // __ BIND(retry_method_osr); { // // Registers alive // R16_thread // R15_prev_state - address of caller's BytecodeInterpreter // R14_state - address of callee's BytecodeInterpreter // R1_SP - callee's SP before call to InterpretMethod // // Registers updated // R17 - pointer to callee's locals array // (declared via `interpreter_arg_ptr_reg' in the AD file) // R19_method - callee's Method // R1_SP - callee's SP (will become SP of OSR adapter frame) // // Provide a debugger breakpoint in the frame manager if breakpoints // in osr'd methods are requested. #ifdef COMPILER2 NOT_PRODUCT( if (OptoBreakpointOSR) { __ illtrap(); } ) #endif // Load callee's pointer to locals array from callee's state. // __ ld(R17, state_(_locals)); // Load osr entry. __ ld(R12_scratch2, state_(_result._osr._osr_entry)); // Load address of temporary osr buffer to arg1. __ ld(R3_ARG1, state_(_result._osr._osr_buf)); __ mtctr(R12_scratch2); // Load method oop, gc may move it during execution of osr'd method. __ ld(R22_tmp2, state_(_method)); // Load message 'call_method'. __ li(R23_tmp3, BytecodeInterpreter::call_method); { // Pop the IJAVA frame of the method which we are going to call osr'd. Label no_state, skip_no_state; __ pop_interpreter_state(/*prev_state_may_be_0=*/true); __ cmpdi(CCR0, R14_state,0); __ beq(CCR0, no_state); // return to interpreter __ pop_interpreter_frame_to_state(R14_state, R11_scratch1, R12_scratch2, R21_tmp1); // Init _result._to_call._callee and tell gc that it contains a valid oop // by setting _msg to 'call_method'. __ std(R22_tmp2, state_(_result._to_call._callee)); // TODO: PPC port: assert(4 == BytecodeInterpreter::sz_msg(), "unexpected field size"); __ stw(R23_tmp3, state_(_msg)); __ load_const(R21_tmp1, frame_manager_specialized_return); __ b(skip_no_state); __ bind(no_state); // Return to initial caller. // Get rid of top frame. __ pop_frame(); // Load return PC from parent frame. __ ld(R21_tmp1, _parent_ijava_frame_abi(lr), R1_SP); // Resize frame to get rid of a potential extension. __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); __ bind(skip_no_state); // Update LR with return pc. __ mtlr(R21_tmp1); } // Jump to the osr entry point. __ bctr(); } //============================================================================= // Interpreted method "returned" with an exception, pass it on. // Pass no result, unwind activation and continue/return to // interpreter/call_stub/c2. __ BIND(throwing_exception); // Check if this is the initial invocation of the frame manager. If // so, previous interpreter state in R15_prev_state will be null. // New tos of caller is callee's first parameter address, that is // callee's incoming arguments are popped. __ ld(R3_RET, state_(_locals)); // Check whether this is an initial call. __ cmpdi(CCR0, R15_prev_state, 0); // Yes, called from the call stub or from generated code via a c2i frame. __ beq(CCR0, unwind_initial_activation_pending_exception); // Send resume message, interpreter will see the exception first. __ li(msg, BytecodeInterpreter::method_resume); __ b(unwind_recursive_activation); //============================================================================= // Push the last instruction out to the code buffer. { __ unimplemented("end of InterpreterGenerator::generate_normal_entry", 128); } interpreter_frame_manager = entry; return interpreter_frame_manager; } // Generate code for various sorts of method entries // address AbstractInterpreterGenerator::generate_method_entry(AbstractInterpreter::MethodKind kind) { address entry_point = NULL; switch (kind) { case Interpreter::zerolocals : break; case Interpreter::zerolocals_synchronized : break; case Interpreter::native : // Fall thru case Interpreter::native_synchronized : entry_point = ((CppInterpreterGenerator*)this)->generate_native_entry(); break; case Interpreter::empty : break; case Interpreter::accessor : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry(); break; case Interpreter::abstract : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry(); break; // These are special interpreter intrinsics which we don't support so far. case Interpreter::java_lang_math_sin : break; case Interpreter::java_lang_math_cos : break; case Interpreter::java_lang_math_tan : break; case Interpreter::java_lang_math_abs : break; case Interpreter::java_lang_math_log : break; case Interpreter::java_lang_math_log10 : break; case Interpreter::java_lang_math_sqrt : break; case Interpreter::java_lang_math_pow : break; case Interpreter::java_lang_math_exp : break; case Interpreter::java_lang_ref_reference_get: entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break; default : ShouldNotReachHere(); break; } if (entry_point) { return entry_point; } return ((InterpreterGenerator*)this)->generate_normal_entry(); } InterpreterGenerator::InterpreterGenerator(StubQueue* code) : CppInterpreterGenerator(code) { generate_all(); // down here so it can be "virtual" } // How much stack a topmost interpreter method activation needs in words. int AbstractInterpreter::size_top_interpreter_activation(Method* method) { // Computation is in bytes not words to match layout_activation_impl // below, but the return is in words. // // 0 [TOP_IJAVA_FRAME_ABI] \ // alignment (optional) \ | // [operand stack / Java parameters] > stack | | // [monitors] (optional) > monitors | | // [PARENT_IJAVA_FRAME_ABI] \ | | // [BytecodeInterpreter object] > interpreter \ | | | // alignment (optional) | round | parent | round | top // [Java result] (2 slots) > result | | | | // [Java non-arg locals] \ locals | | | | // [arg locals] / / / / / // int locals = method->max_locals() * BytesPerWord; int interpreter = frame::interpreter_frame_cinterpreterstate_size_in_bytes(); int result = 2 * BytesPerWord; int parent = round_to(interpreter + result + locals, 16) + frame::parent_ijava_frame_abi_size; int stack = method->max_stack() * BytesPerWord; int monitors = method->is_synchronized() ? frame::interpreter_frame_monitor_size_in_bytes() : 0; int top = round_to(parent + monitors + stack, 16) + frame::top_ijava_frame_abi_size; return (top / BytesPerWord); } void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill, frame* caller, frame* current, Method* method, intptr_t* locals, intptr_t* stack, intptr_t* stack_base, intptr_t* monitor_base, intptr_t* frame_sp, bool is_top_frame) { // What about any vtable? // to_fill->_thread = JavaThread::current(); // This gets filled in later but make it something recognizable for now. to_fill->_bcp = method->code_base(); to_fill->_locals = locals; to_fill->_constants = method->constants()->cache(); to_fill->_method = method; to_fill->_mdx = NULL; to_fill->_stack = stack; if (is_top_frame && JavaThread::current()->popframe_forcing_deopt_reexecution()) { to_fill->_msg = deopt_resume2; } else { to_fill->_msg = method_resume; } to_fill->_result._to_call._bcp_advance = 0; to_fill->_result._to_call._callee_entry_point = NULL; // doesn't matter to anyone to_fill->_result._to_call._callee = NULL; // doesn't matter to anyone to_fill->_prev_link = NULL; if (caller->is_interpreted_frame()) { interpreterState prev = caller->get_interpreterState(); // Support MH calls. Make sure the interpreter will return the right address: // 1. Caller did ordinary interpreted->compiled call call: Set a prev_state // which makes the CPP interpreter return to frame manager "return_from_interpreted_method" // entry after finishing execution. // 2. Caller did a MH call: If the caller has a MethodHandleInvoke in it's // state (invariant: must be the caller of the bottom vframe) we used the // "call_special" entry to do the call, meaning the arguments have not been // popped from the stack. Therefore, don't enter a prev state in this case // in order to return to "return_from_native" frame manager entry which takes // care of popping arguments. Also, don't overwrite the MH.invoke Method in // the prev_state in order to be able to figure out the number of arguments to // pop. // The parameter method can represent MethodHandle.invokeExact(...). // The MethodHandleCompiler generates these synthetic Methods, // including bytecodes, if an invokedynamic call gets inlined. In // this case we want to return like from any other interpreted // Java call, so we set _prev_link. to_fill->_prev_link = prev; if (*prev->_bcp == Bytecodes::_invokeinterface || *prev->_bcp == Bytecodes::_invokedynamic) { prev->_result._to_call._bcp_advance = 5; } else { prev->_result._to_call._bcp_advance = 3; } } to_fill->_oop_temp = NULL; to_fill->_stack_base = stack_base; // Need +1 here because stack_base points to the word just above the // first expr stack entry and stack_limit is supposed to point to // the word just below the last expr stack entry. See // generate_compute_interpreter_state. to_fill->_stack_limit = stack_base - (method->max_stack() + 1); to_fill->_monitor_base = (BasicObjectLock*) monitor_base; to_fill->_frame_bottom = frame_sp; // PPC64 specific to_fill->_last_Java_pc = NULL; to_fill->_last_Java_fp = NULL; to_fill->_last_Java_sp = frame_sp; #ifdef ASSERT to_fill->_self_link = to_fill; to_fill->_native_fresult = 123456.789; to_fill->_native_lresult = CONST64(0xdeafcafedeadc0de); #endif } void BytecodeInterpreter::pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp) { istate->_last_Java_pc = last_Java_pc; istate->_last_Java_fp = last_Java_fp; } int AbstractInterpreter::layout_activation(Method* method, int temps, // Number of slots on java expression stack in use. int popframe_args, int monitors, // Number of active monitors. int caller_actual_parameters, int callee_params,// Number of slots for callee parameters. int callee_locals,// Number of slots for locals. frame* caller, frame* interpreter_frame, bool is_top_frame, bool is_bottom_frame) { // NOTE this code must exactly mimic what // InterpreterGenerator::generate_compute_interpreter_state() does // as far as allocating an interpreter frame. However there is an // exception. With the C++ based interpreter only the top most frame // has a full sized expression stack. The 16 byte slop factor is // both the abi scratch area and a place to hold a result from a // callee on its way to the callers stack. int monitor_size = frame::interpreter_frame_monitor_size_in_bytes() * monitors; int frame_size; int top_frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes() + monitor_size + (method->max_stack() *Interpreter::stackElementWords * BytesPerWord) + 2*BytesPerWord, frame::alignment_in_bytes) + frame::top_ijava_frame_abi_size; if (is_top_frame) { frame_size = top_frame_size; } else { frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes() + monitor_size + ((temps - callee_params + callee_locals) * Interpreter::stackElementWords * BytesPerWord) + 2*BytesPerWord, frame::alignment_in_bytes) + frame::parent_ijava_frame_abi_size; assert(popframe_args==0, "non-zero for top_frame only"); } // If we actually have a frame to layout we must now fill in all the pieces. if (interpreter_frame != NULL) { intptr_t sp = (intptr_t)interpreter_frame->sp(); intptr_t fp = *(intptr_t *)sp; assert(fp == (intptr_t)caller->sp(), "fp must match"); interpreterState cur_state = (interpreterState)(fp - frame::interpreter_frame_cinterpreterstate_size_in_bytes()); // Now fill in the interpreterState object. intptr_t* locals; if (caller->is_interpreted_frame()) { // Locals must agree with the caller because it will be used to set the // caller's tos when we return. interpreterState prev = caller->get_interpreterState(); // Calculate start of "locals" for MH calls. For MH calls, the // current method() (= MH target) and prev->callee() (= // MH.invoke*()) are different and especially have different // signatures. To pop the argumentsof the caller, we must use // the prev->callee()->size_of_arguments() because that's what // the caller actually pushed. Currently, for synthetic MH // calls (deoptimized from inlined MH calls), detected by // is_method_handle_invoke(), we use the callee's arguments // because here, the caller's and callee's signature match. if (true /*!caller->is_at_mh_callsite()*/) { locals = prev->stack() + method->size_of_parameters(); } else { // Normal MH call. locals = prev->stack() + prev->callee()->size_of_parameters(); } } else { bool is_deopted; locals = (intptr_t*) (fp + ((method->max_locals() - 1) * BytesPerWord) + frame::parent_ijava_frame_abi_size); } intptr_t* monitor_base = (intptr_t*) cur_state; intptr_t* stack_base = (intptr_t*) ((intptr_t) monitor_base - monitor_size); // Provide pop_frame capability on PPC64, add popframe_args. // +1 because stack is always prepushed. intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (temps + popframe_args + 1) * BytesPerWord); BytecodeInterpreter::layout_interpreterState(cur_state, caller, interpreter_frame, method, locals, stack, stack_base, monitor_base, (intptr_t*)(((intptr_t)fp)-top_frame_size), is_top_frame); BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, interpreter_frame->fp()); } return frame_size/BytesPerWord; } #endif // CC_INTERP