From 8f491dd925fe954918a6f40f36dce1cad9363d47 Mon Sep 17 00:00:00 2001 From: simonis Date: Wed, 27 Jan 2016 15:01:46 +0100 Subject: [PATCH] 8148487: PPC64: Better byte behavior Reviewed-by: goetz, mdoerr --- src/cpu/ppc/vm/interp_masm_ppc_64.cpp | 40 +++++++++++++++ src/cpu/ppc/vm/interp_masm_ppc_64.hpp | 2 + src/cpu/ppc/vm/interpreter_ppc.cpp | 10 ++++ src/cpu/ppc/vm/templateInterpreter_ppc.cpp | 12 ++++- src/cpu/ppc/vm/templateTable_ppc_64.cpp | 60 ++++++++++++++++++++-- 5 files changed, 118 insertions(+), 6 deletions(-) diff --git a/src/cpu/ppc/vm/interp_masm_ppc_64.cpp b/src/cpu/ppc/vm/interp_masm_ppc_64.cpp index 0d39104ac..44763647c 100644 --- a/src/cpu/ppc/vm/interp_masm_ppc_64.cpp +++ b/src/cpu/ppc/vm/interp_masm_ppc_64.cpp @@ -174,6 +174,7 @@ void InterpreterMacroAssembler::load_earlyret_value(TosState state, Register Rsc case ltos: ld(R17_tos, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState); break; case btos: // fall through + case ztos: // fall through case ctos: // fall through case stos: // fall through case itos: lwz(R17_tos, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState); @@ -302,6 +303,7 @@ void InterpreterMacroAssembler::push(TosState state) { switch (state) { case atos: push_ptr(); break; case btos: + case ztos: case ctos: case stos: case itos: push_i(); break; @@ -317,6 +319,7 @@ void InterpreterMacroAssembler::pop(TosState state) { switch (state) { case atos: pop_ptr(); break; case btos: + case ztos: case ctos: case stos: case itos: pop_i(); break; @@ -751,6 +754,43 @@ void InterpreterMacroAssembler::merge_frames(Register Rsender_sp, Register retur stdux(Rscratch2, R1_SP, Rscratch1); // atomically set *(SP = top_frame_sp) = **SP } +void InterpreterMacroAssembler::narrow(Register result) { + Register ret_type = R11_scratch1; + ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method); + lbz(ret_type, in_bytes(ConstMethod::result_type_offset()), R11_scratch1); + + Label notBool, notByte, notChar, done; + + // common case first + cmpwi(CCR0, ret_type, T_INT); + beq(CCR0, done); + + cmpwi(CCR0, ret_type, T_BOOLEAN); + bne(CCR0, notBool); + andi(result, result, 0x1); + b(done); + + bind(notBool); + cmpwi(CCR0, ret_type, T_BYTE); + bne(CCR0, notByte); + extsb(result, result); + b(done); + + bind(notByte); + cmpwi(CCR0, ret_type, T_CHAR); + bne(CCR0, notChar); + andi(result, result, 0xffff); + b(done); + + bind(notChar); + // cmpwi(CCR0, ret_type, T_SHORT); // all that's left + // bne(CCR0, done); + extsh(result, result); + + // Nothing to do for T_INT + bind(done); +} + // Remove activation. // // Unlock the receiver if this is a synchronized method. diff --git a/src/cpu/ppc/vm/interp_masm_ppc_64.hpp b/src/cpu/ppc/vm/interp_masm_ppc_64.hpp index 555cc5ee9..4b9916ca5 100644 --- a/src/cpu/ppc/vm/interp_masm_ppc_64.hpp +++ b/src/cpu/ppc/vm/interp_masm_ppc_64.hpp @@ -149,6 +149,8 @@ class InterpreterMacroAssembler: public MacroAssembler { void get_cpool_and_tags(Register Rcpool, Register Rtags); void is_a(Label& L); + void narrow(Register result); + // Java Call Helpers void call_from_interpreter(Register Rtarget_method, Register Rret_addr, Register Rscratch1, Register Rscratch2); diff --git a/src/cpu/ppc/vm/interpreter_ppc.cpp b/src/cpu/ppc/vm/interpreter_ppc.cpp index 12b271a81..22cc339ad 100644 --- a/src/cpu/ppc/vm/interpreter_ppc.cpp +++ b/src/cpu/ppc/vm/interpreter_ppc.cpp @@ -632,6 +632,16 @@ address InterpreterGenerator::generate_accessor_entry(void) { __ blr(); } + if (branch_table[ztos] == 0) { // generate only once + __ align(32, 28, 28); // align load + __ fence(); // volatile entry point (one instruction before non-volatile_entry point) + branch_table[ztos] = __ pc(); // non-volatile_entry point + __ lbzx(R3_RET, Rclass_or_obj, Roffset); + __ extsb(R3_RET, R3_RET); + __ beq(CCR6, Lacquire); + __ blr(); + } + if (branch_table[ctos] == 0) { // generate only once __ align(32, 28, 28); // align load __ fence(); // volatile entry point (one instruction before non-volatile_entry point) diff --git a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp index a8d19d286..de148ae39 100644 --- a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp +++ b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp @@ -154,6 +154,7 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, switch (state) { case ltos: case btos: + case ztos: case ctos: case stos: case atos: @@ -200,6 +201,7 @@ address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, i switch (state) { case ltos: case btos: + case ztos: case ctos: case stos: case atos: @@ -1642,12 +1644,14 @@ address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state // Copied from TemplateTable::_return. // Restoration of lr done by remove_activation. switch (state) { + // Narrow result if state is itos but result type is smaller. + case itos: __ narrow(R17_tos); /* fall through */ case ltos: case btos: + case ztos: case ctos: case stos: - case atos: - case itos: __ mr(R3_RET, R17_tos); break; + case atos: __ mr(R3_RET, R17_tos); break; case ftos: case dtos: __ fmr(F1_RET, F15_ftos); break; case vtos: // This might be a constructor. Final fields (and volatile fields on PPC64) need @@ -1717,6 +1721,10 @@ address TemplateInterpreterGenerator::generate_trace_code(TosState state) { bname = "trace_code_btos {"; tsize = 2; break; + case ztos: + bname = "trace_code_ztos {"; + tsize = 2; + break; case ctos: bname = "trace_code_ctos {"; tsize = 2; diff --git a/src/cpu/ppc/vm/templateTable_ppc_64.cpp b/src/cpu/ppc/vm/templateTable_ppc_64.cpp index c4cc257f2..ee7b925bc 100644 --- a/src/cpu/ppc/vm/templateTable_ppc_64.cpp +++ b/src/cpu/ppc/vm/templateTable_ppc_64.cpp @@ -173,6 +173,7 @@ void TemplateTable::patch_bytecode(Bytecodes::Code new_bc, Register Rnew_bc, Reg switch (new_bc) { case Bytecodes::_fast_aputfield: case Bytecodes::_fast_bputfield: + case Bytecodes::_fast_zputfield: case Bytecodes::_fast_cputfield: case Bytecodes::_fast_dputfield: case Bytecodes::_fast_fputfield: @@ -969,9 +970,21 @@ void TemplateTable::bastore() { Rarray = R12_scratch2, Rscratch = R3_ARG1; __ pop_i(Rindex); + __ pop_ptr(Rarray); // tos: val - // Rarray: array ptr (popped by index_check) - __ index_check(Rarray, Rindex, 0, Rscratch, Rarray); + + // Need to check whether array is boolean or byte + // since both types share the bastore bytecode. + __ load_klass(Rscratch, Rarray); + __ lwz(Rscratch, in_bytes(Klass::layout_helper_offset()), Rscratch); + int diffbit = exact_log2(Klass::layout_helper_boolean_diffbit()); + __ testbitdi(CCR0, R0, Rscratch, diffbit); + Label L_skip; + __ bfalse(CCR0, L_skip); + __ andi(R17_tos, R17_tos, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1 + __ bind(L_skip); + + __ index_check_without_pop(Rarray, Rindex, 0, Rscratch, Rarray); __ stb(R17_tos, arrayOopDesc::base_offset_in_bytes(T_BYTE), Rarray); } @@ -2100,12 +2113,16 @@ void TemplateTable::_return(TosState state) { __ remove_activation(state, /* throw_monitor_exception */ true); // Restoration of lr done by remove_activation. switch (state) { + // Narrow result if state is itos but result type is smaller. + // Need to narrow in the return bytecode rather than in generate_return_entry + // since compiled code callers expect the result to already be narrowed. + case itos: __ narrow(R17_tos); /* fall through */ case ltos: case btos: + case ztos: case ctos: case stos: - case atos: - case itos: __ mr(R3_RET, R17_tos); break; + case atos: __ mr(R3_RET, R17_tos); break; case ftos: case dtos: __ fmr(F1_RET, F15_ftos); break; case vtos: // This might be a constructor. Final fields (and volatile fields on PPC64) need @@ -2500,6 +2517,21 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static) { __ beq(CCR6, Lacquire); // Volatile? __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); + __ align(32, 28, 28); // Align load. + // __ bind(Lztos); (same code as btos) + __ fence(); // Volatile entry point (one instruction before non-volatile_entry point). + assert(branch_table[ztos] == 0, "can't compute twice"); + branch_table[ztos] = __ pc(); // non-volatile_entry point + __ lbzx(R17_tos, Rclass_or_obj, Roffset); + __ extsb(R17_tos, R17_tos); + __ push(ztos); + if (!is_static) { + // use btos rewriting, no truncating to t/f bit is needed for getfield. + patch_bytecode(Bytecodes::_fast_bgetfield, Rbc, Rscratch); + } + __ beq(CCR6, Lacquire); // Volatile? + __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); + __ align(32, 28, 28); // Align load. // __ bind(Lctos); __ fence(); // Volatile entry point (one instruction before non-volatile_entry point). @@ -2590,6 +2622,7 @@ void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rscratch, boo case Bytecodes::_fast_aputfield: __ push_ptr(); offs+= Interpreter::stackElementSize; break; case Bytecodes::_fast_iputfield: // Fall through case Bytecodes::_fast_bputfield: // Fall through + case Bytecodes::_fast_zputfield: // Fall through case Bytecodes::_fast_cputfield: // Fall through case Bytecodes::_fast_sputfield: __ push_i(); offs+= Interpreter::stackElementSize; break; case Bytecodes::_fast_lputfield: __ push_l(); offs+=2*Interpreter::stackElementSize; break; @@ -2630,6 +2663,7 @@ void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rscratch, boo case Bytecodes::_fast_aputfield: __ pop_ptr(); break; case Bytecodes::_fast_iputfield: // Fall through case Bytecodes::_fast_bputfield: // Fall through + case Bytecodes::_fast_zputfield: // Fall through case Bytecodes::_fast_cputfield: // Fall through case Bytecodes::_fast_sputfield: __ pop_i(); break; case Bytecodes::_fast_lputfield: __ pop_l(); break; @@ -2780,6 +2814,21 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static) { } __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); + __ align(32, 28, 28); // Align pop. + // __ bind(Lztos); + __ release(); // Volatile entry point (one instruction before non-volatile_entry point). + assert(branch_table[ztos] == 0, "can't compute twice"); + branch_table[ztos] = __ pc(); // non-volatile_entry point + __ pop(ztos); + if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1. + __ andi(R17_tos, R17_tos, 0x1); + __ stbx(R17_tos, Rclass_or_obj, Roffset); + if (!is_static) { patch_bytecode(Bytecodes::_fast_zputfield, Rbc, Rscratch, true, byte_no); } + if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ beq(CR_is_vol, Lvolatile); // Volatile? + } + __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); + __ align(32, 28, 28); // Align pop. // __ bind(Lctos); __ release(); // Volatile entry point (one instruction before non-volatile_entry point). @@ -2895,6 +2944,9 @@ void TemplateTable::fast_storefield(TosState state) { __ stdx(R17_tos, Rclass_or_obj, Roffset); break; + case Bytecodes::_fast_zputfield: + __ andi(R17_tos, R17_tos, 0x1); // boolean is true if LSB is 1 + // fall through to bputfield case Bytecodes::_fast_bputfield: __ stbx(R17_tos, Rclass_or_obj, Roffset); break; -- GitLab