diff --git a/src/cpu/sparc/vm/assembler_sparc.cpp b/src/cpu/sparc/vm/assembler_sparc.cpp index 998f39508e74601551b17fcb015108f7c1331a2a..07879e32a8bb9f0fb0a9214e64ac6b7b7b629e90 100644 --- a/src/cpu/sparc/vm/assembler_sparc.cpp +++ b/src/cpu/sparc/vm/assembler_sparc.cpp @@ -2615,7 +2615,8 @@ void MacroAssembler::cas_under_lock(Register top_ptr_reg, Register top_reg, Regi } } -void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, Register temp_reg, +void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, + Register temp_reg, Label& done, Label* slow_case, BiasedLockingCounters* counters) { assert(UseBiasedLocking, "why call this otherwise?"); @@ -2691,8 +2692,7 @@ void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, R markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place, mark_reg); or3(G2_thread, mark_reg, temp_reg); - casx_under_lock(mark_addr.base(), mark_reg, temp_reg, - (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); + casn(mark_addr.base(), mark_reg, temp_reg); // If the biasing toward our thread failed, this means that // another thread succeeded in biasing it toward itself and we // need to revoke that bias. The revocation will occur in the @@ -2721,8 +2721,7 @@ void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, R load_klass(obj_reg, temp_reg); ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); or3(G2_thread, temp_reg, temp_reg); - casx_under_lock(mark_addr.base(), mark_reg, temp_reg, - (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); + casn(mark_addr.base(), mark_reg, temp_reg); // If the biasing toward our thread failed, this means that // another thread succeeded in biasing it toward itself and we // need to revoke that bias. The revocation will occur in the @@ -2752,8 +2751,7 @@ void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, R // bits in this situation. Should attempt to preserve them. load_klass(obj_reg, temp_reg); ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); - casx_under_lock(mark_addr.base(), mark_reg, temp_reg, - (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); + casn(mark_addr.base(), mark_reg, temp_reg); // Fall through to the normal CAS-based lock, because no matter what // the result of the above CAS, some thread must have succeeded in // removing the bias bit from the object's header. @@ -2815,8 +2813,10 @@ void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg // effect). -void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark, Register Rbox, Register Rscratch, - BiasedLockingCounters* counters) { +void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark, + Register Rbox, Register Rscratch, + BiasedLockingCounters* counters, + bool try_bias) { Address mark_addr(Roop, 0, oopDesc::mark_offset_in_bytes()); verify_oop(Roop); @@ -2838,7 +2838,7 @@ void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark, Registe // Fetch object's markword ld_ptr(mark_addr, Rmark); - if (UseBiasedLocking) { + if (try_bias) { biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); } @@ -2881,7 +2881,7 @@ void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark, Registe ld_ptr (mark_addr, Rmark); // fetch obj->mark // Triage: biased, stack-locked, neutral, inflated - if (UseBiasedLocking) { + if (try_bias) { biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); // Invariant: if control reaches this point in the emitted stream // then Rmark has not been modified. @@ -2945,7 +2945,7 @@ void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark, Registe ld_ptr (mark_addr, Rmark); // fetch obj->mark // Triage: biased, stack-locked, neutral, inflated - if (UseBiasedLocking) { + if (try_bias) { biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); // Invariant: if control reaches this point in the emitted stream // then Rmark has not been modified. @@ -3039,7 +3039,9 @@ void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark, Registe bind (done) ; } -void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, Register Rbox, Register Rscratch) { +void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, + Register Rbox, Register Rscratch, + bool try_bias) { Address mark_addr(Roop, 0, oopDesc::mark_offset_in_bytes()); Label done ; @@ -3050,7 +3052,7 @@ void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, Regis } if (EmitSync & 8) { - if (UseBiasedLocking) { + if (try_bias) { biased_locking_exit(mark_addr, Rscratch, done); } @@ -3077,7 +3079,7 @@ void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, Regis // I$ effects. Label LStacked ; - if (UseBiasedLocking) { + if (try_bias) { // TODO: eliminate redundant LDs of obj->mark biased_locking_exit(mark_addr, Rscratch, done); } diff --git a/src/cpu/sparc/vm/assembler_sparc.hpp b/src/cpu/sparc/vm/assembler_sparc.hpp index 3cea450cc2c6bb4c7d6c7c5a8a6849031579fe2b..1d735ade1748afe8e232f9f16db118be91130f97 100644 --- a/src/cpu/sparc/vm/assembler_sparc.hpp +++ b/src/cpu/sparc/vm/assembler_sparc.hpp @@ -2220,9 +2220,13 @@ class MacroAssembler: public Assembler { // These set the icc condition code to equal if the lock succeeded // and notEqual if it failed and requires a slow case - void compiler_lock_object(Register Roop, Register Rmark, Register Rbox, Register Rscratch, - BiasedLockingCounters* counters = NULL); - void compiler_unlock_object(Register Roop, Register Rmark, Register Rbox, Register Rscratch); + void compiler_lock_object(Register Roop, Register Rmark, Register Rbox, + Register Rscratch, + BiasedLockingCounters* counters = NULL, + bool try_bias = UseBiasedLocking); + void compiler_unlock_object(Register Roop, Register Rmark, Register Rbox, + Register Rscratch, + bool try_bias = UseBiasedLocking); // Biased locking support // Upon entry, lock_reg must point to the lock record on the stack, diff --git a/src/cpu/sparc/vm/sparc.ad b/src/cpu/sparc/vm/sparc.ad index 145a1564bb399814610d7f2b2aa49e324d0fa41a..06af021d1fd21c20a870d954e262b3cfddbf8f4e 100644 --- a/src/cpu/sparc/vm/sparc.ad +++ b/src/cpu/sparc/vm/sparc.ad @@ -395,6 +395,7 @@ reg_class long_reg( R_G1H,R_G1, R_G3H,R_G3, R_G4H,R_G4, ); reg_class g1_regL(R_G1H,R_G1); +reg_class g3_regL(R_G3H,R_G3); reg_class o2_regL(R_O2H,R_O2); reg_class o7_regL(R_O7H,R_O7); @@ -2688,7 +2689,7 @@ enc_class Fast_Lock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{ assert(Rbox != Rscratch, ""); assert(Rbox != Rmark, ""); - __ compiler_lock_object(Roop, Rmark, Rbox, Rscratch, _counters); + __ compiler_lock_object(Roop, Rmark, Rbox, Rscratch, _counters, UseBiasedLocking && !UseOptoBiasInlining); %} enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{ @@ -2704,7 +2705,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{ assert(Rbox != Rscratch, ""); assert(Rbox != Rmark, ""); - __ compiler_unlock_object(Roop, Rmark, Rbox, Rscratch); + __ compiler_unlock_object(Roop, Rmark, Rbox, Rscratch, UseBiasedLocking && !UseOptoBiasInlining); %} enc_class enc_cas( iRegP mem, iRegP old, iRegP new ) %{ @@ -2716,8 +2717,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{ // casx_under_lock picks 1 of 3 encodings: // For 32-bit pointers you get a 32-bit CAS // For 64-bit pointers you get a 64-bit CASX - __ casx_under_lock(Rmem, Rold, Rnew, // Swap(*Rmem,Rnew) if *Rmem == Rold - (address) StubRoutines::Sparc::atomic_memory_operation_lock_addr()); + __ casn(Rmem, Rold, Rnew); // Swap(*Rmem,Rnew) if *Rmem == Rold __ cmp( Rold, Rnew ); %} @@ -3766,6 +3766,14 @@ operand g1RegL() %{ interface(REG_INTER); %} +operand g3RegL() %{ + constraint(ALLOC_IN_RC(g3_regL)); + match(iRegL); + + format %{ %} + interface(REG_INTER); +%} + // Int Register safe // This is 64bit safe operand iRegIsafe() %{ @@ -6602,32 +6610,23 @@ instruct storePConditional( iRegP heap_top_ptr, iRegP oldval, g3RegP newval, fla ins_pipe( long_memory_op ); %} -instruct storeLConditional_bool(iRegP mem_ptr, iRegL oldval, iRegL newval, iRegI res, o7RegI tmp1, flagsReg ccr ) %{ - match(Set res (StoreLConditional mem_ptr (Binary oldval newval))); - effect( USE mem_ptr, KILL ccr, KILL tmp1); - // Marshal the register pairs into V9 64-bit registers, then do the compare-and-swap - format %{ - "MOV $newval,R_O7\n\t" - "CASXA [$mem_ptr],$oldval,R_O7\t! If $oldval==[$mem_ptr] Then store R_O7 into [$mem_ptr], set R_O7=[$mem_ptr] in any case\n\t" - "CMP $oldval,R_O7\t\t! See if we made progress\n\t" - "MOV 1,$res\n\t" - "MOVne xcc,R_G0,$res" - %} - ins_encode( enc_casx(mem_ptr, oldval, newval), - enc_lflags_ne_to_boolean(res) ); +// Conditional-store of an int value. +instruct storeIConditional( iRegP mem_ptr, iRegI oldval, g3RegI newval, flagsReg icc ) %{ + match(Set icc (StoreIConditional mem_ptr (Binary oldval newval))); + effect( KILL newval ); + format %{ "CASA [$mem_ptr],$oldval,$newval\t! If $oldval==[$mem_ptr] Then store $newval into [$mem_ptr], set $newval=[$mem_ptr] in any case\n\t" + "CMP $oldval,$newval\t\t! See if we made progress" %} + ins_encode( enc_cas(mem_ptr,oldval,newval) ); ins_pipe( long_memory_op ); %} -instruct storeLConditional_flags(iRegP mem_ptr, iRegL oldval, iRegL newval, flagsRegL xcc, o7RegI tmp1, immI0 zero) %{ - match(Set xcc (CmpI (StoreLConditional mem_ptr (Binary oldval newval)) zero)); - effect( USE mem_ptr, KILL tmp1); - // Marshal the register pairs into V9 64-bit registers, then do the compare-and-swap - format %{ - "MOV $newval,R_O7\n\t" - "CASXA [$mem_ptr],$oldval,R_O7\t! If $oldval==[$mem_ptr] Then store R_O7 into [$mem_ptr], set R_O7=[$mem_ptr] in any case\n\t" - "CMP $oldval,R_O7\t\t! See if we made progress" - %} - ins_encode( enc_casx(mem_ptr, oldval, newval)); +// Conditional-store of a long value. +instruct storeLConditional( iRegP mem_ptr, iRegL oldval, g3RegL newval, flagsRegL xcc ) %{ + match(Set xcc (StoreLConditional mem_ptr (Binary oldval newval))); + effect( KILL newval ); + format %{ "CASXA [$mem_ptr],$oldval,$newval\t! If $oldval==[$mem_ptr] Then store $newval into [$mem_ptr], set $newval=[$mem_ptr] in any case\n\t" + "CMP $oldval,$newval\t\t! See if we made progress" %} + ins_encode( enc_cas(mem_ptr,oldval,newval) ); ins_pipe( long_memory_op ); %} @@ -7410,6 +7409,34 @@ instruct orL_reg_imm13(iRegL dst, iRegL src1, immL13 con) %{ ins_pipe(ialu_reg_imm); %} +#ifndef _LP64 + +// Use sp_ptr_RegP to match G2 (TLS register) without spilling. +instruct orI_reg_castP2X(iRegI dst, iRegI src1, sp_ptr_RegP src2) %{ + match(Set dst (OrI src1 (CastP2X src2))); + + size(4); + format %{ "OR $src1,$src2,$dst" %} + opcode(Assembler::or_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +#else + +instruct orL_reg_castP2X(iRegL dst, iRegL src1, sp_ptr_RegP src2) %{ + match(Set dst (OrL src1 (CastP2X src2))); + + ins_cost(DEFAULT_COST); + size(4); + format %{ "OR $src1,$src2,$dst\t! long" %} + opcode(Assembler::or_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +#endif + // Xor Instructions // Register Xor instruct xorI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp index ec208540ad012c553b437fa98d0c0c9dbc408e88..edf0d3dbcafe44c7375fbbfc0b4ef1c04fb2ca9a 100644 --- a/src/cpu/x86/vm/assembler_x86.cpp +++ b/src/cpu/x86/vm/assembler_x86.cpp @@ -621,6 +621,10 @@ address Assembler::locate_operand(address inst, WhichOperand which) { debug_only(has_disp32 = true); break; + case 0xF0: // Lock + assert(os::is_MP(), "only on MP"); + goto again_after_prefix; + case 0xF3: // For SSE case 0xF2: // For SSE2 switch (0xFF & *ip++) { diff --git a/src/cpu/x86/vm/assembler_x86.hpp b/src/cpu/x86/vm/assembler_x86.hpp index c2b64771803178b502767b5209dc292a24e25aa7..32cb356a1a1c5e74ab2cf06ce5d4b04614e59e53 100644 --- a/src/cpu/x86/vm/assembler_x86.hpp +++ b/src/cpu/x86/vm/assembler_x86.hpp @@ -1780,7 +1780,8 @@ class MacroAssembler: public Assembler { // check info (currently consumed only by C1). If // swap_reg_contains_mark is true then returns -1 as it is assumed // the calling code has already passed any potential faults. - int biased_locking_enter(Register lock_reg, Register obj_reg, Register swap_reg, Register tmp_reg, + int biased_locking_enter(Register lock_reg, Register obj_reg, + Register swap_reg, Register tmp_reg, bool swap_reg_contains_mark, Label& done, Label* slow_case = NULL, BiasedLockingCounters* counters = NULL); diff --git a/src/cpu/x86/vm/x86_32.ad b/src/cpu/x86/vm/x86_32.ad index 30e5fc979c395ace9135dcfd0af69b12530bc305..5f6f8724adb29014301d5c5747a58d5328187391 100644 --- a/src/cpu/x86/vm/x86_32.ad +++ b/src/cpu/x86/vm/x86_32.ad @@ -3313,7 +3313,7 @@ encode %{ // Beware -- there's a subtle invariant that fetch of the markword // at [FETCH], below, will never observe a biased encoding (*101b). // If this invariant is not held we risk exclusion (safety) failure. - if (UseBiasedLocking) { + if (UseBiasedLocking && !UseOptoBiasInlining) { masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); } @@ -3534,7 +3534,7 @@ encode %{ // Critically, the biased locking test must have precedence over // and appear before the (box->dhw == 0) recursive stack-lock test. - if (UseBiasedLocking) { + if (UseBiasedLocking && !UseOptoBiasInlining) { masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); } @@ -7930,33 +7930,36 @@ instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, e ins_pipe( pipe_cmpxchg ); %} -// Conditional-store of a long value -// Returns a boolean value (0/1) on success. Implemented with a CMPXCHG8 on Intel. -// mem_ptr can actually be in either ESI or EDI -instruct storeLConditional( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ - match(Set res (StoreLConditional mem_ptr (Binary oldval newval))); - effect(KILL cr); - // EDX:EAX is killed if there is contention, but then it's also unused. - // In the common case of no contention, EDX:EAX holds the new oop address. - format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" - "MOV $res,0\n\t" - "JNE,s fail\n\t" - "MOV $res,1\n" - "fail:" %} - ins_encode( enc_cmpxchg8(mem_ptr), - enc_flags_ne_to_boolean(res) ); +// Conditional-store of an int value. +// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. +instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{ + match(Set cr (StoreIConditional mem (Binary oldval newval))); + effect(KILL oldval); + format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} + ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); ins_pipe( pipe_cmpxchg ); %} -// Conditional-store of a long value -// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. -// mem_ptr can actually be in either ESI or EDI -instruct storeLConditional_flags( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr, immI0 zero ) %{ - match(Set cr (CmpI (StoreLConditional mem_ptr (Binary oldval newval)) zero)); - // EDX:EAX is killed if there is contention, but then it's also unused. - // In the common case of no contention, EDX:EAX holds the new oop address. - format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} - ins_encode( enc_cmpxchg8(mem_ptr) ); +// Conditional-store of a long value. +// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. +instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ + match(Set cr (StoreLConditional mem (Binary oldval newval))); + effect(KILL oldval); + format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" + "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" + "XCHG EBX,ECX" + %} + ins_encode %{ + // Note: we need to swap rbx, and rcx before and after the + // cmpxchg8 instruction because the instruction uses + // rcx as the high order word of the new value to store but + // our register encoding uses rbx. + __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); + if( os::is_MP() ) + __ lock(); + __ cmpxchg8(Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp)); + __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); + %} ins_pipe( pipe_cmpxchg ); %} @@ -8423,6 +8426,7 @@ instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ ins_pipe( ialu_reg ); %} + // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. // This idiom is used by the compiler for the i2b bytecode. instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour, eFlagsReg cr) %{ @@ -8540,6 +8544,18 @@ instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ ins_pipe( ialu_reg_reg ); %} +instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{ + match(Set dst (OrI dst (CastP2X src))); + effect(KILL cr); + + size(2); + format %{ "OR $dst,$src" %} + opcode(0x0B); + ins_encode( OpcP, RegReg( dst, src) ); + ins_pipe( ialu_reg_reg ); +%} + + // Or Register with Immediate instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (OrI dst src)); diff --git a/src/cpu/x86/vm/x86_64.ad b/src/cpu/x86/vm/x86_64.ad index 32a7ee8f051bf31e334ff69659895514f8177cb9..9b5621f39030aef78f69f2a0750f0dfa90d5f5d6 100644 --- a/src/cpu/x86/vm/x86_64.ad +++ b/src/cpu/x86/vm/x86_64.ad @@ -3572,7 +3572,7 @@ encode %{ // at [FETCH], below, will never observe a biased encoding (*101b). // If this invariant is not held we'll suffer exclusion (safety) failure. - if (UseBiasedLocking) { + if (UseBiasedLocking && !UseOptoBiasInlining) { masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters); masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH] } @@ -3660,7 +3660,7 @@ encode %{ } else { Label DONE_LABEL, Stacked, CheckSucc ; - if (UseBiasedLocking) { + if (UseBiasedLocking && !UseOptoBiasInlining) { masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); } @@ -7845,7 +7845,7 @@ instruct storePConditional(memory heap_top_ptr, rFlagsReg cr) %{ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); - + format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) " "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %} opcode(0x0F, 0xB1); @@ -7856,53 +7856,40 @@ instruct storePConditional(memory heap_top_ptr, ins_pipe(pipe_cmpxchg); %} -// Conditional-store of a long value -// Returns a boolean value (0/1) on success. Implemented with a -// CMPXCHG8 on Intel. mem_ptr can actually be in either RSI or RDI - -instruct storeLConditional(rRegI res, - memory mem_ptr, - rax_RegL oldval, rRegL newval, - rFlagsReg cr) +// Conditional-store of an int value. +// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. +instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr) %{ - match(Set res (StoreLConditional mem_ptr (Binary oldval newval))); - effect(KILL cr); + match(Set cr (StoreIConditional mem (Binary oldval newval))); + effect(KILL oldval); - format %{ "cmpxchgq $mem_ptr, $newval\t# (long) " - "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" - "sete $res\n\t" - "movzbl $res, $res" %} + format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %} opcode(0x0F, 0xB1); ins_encode(lock_prefix, - REX_reg_mem_wide(newval, mem_ptr), + REX_reg_mem(newval, mem), OpcP, OpcS, - reg_mem(newval, mem_ptr), - REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete - REX_reg_breg(res, res), // movzbl - Opcode(0xF), Opcode(0xB6), reg_reg(res, res)); + reg_mem(newval, mem)); ins_pipe(pipe_cmpxchg); %} -// Conditional-store of a long value -// ZF flag is set on success, reset otherwise. Implemented with a -// CMPXCHG8 on Intel. mem_ptr can actually be in either RSI or RDI -instruct storeLConditional_flags(memory mem_ptr, - rax_RegL oldval, rRegL newval, - rFlagsReg cr, - immI0 zero) +// Conditional-store of a long value. +// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. +instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr) %{ - match(Set cr (CmpI (StoreLConditional mem_ptr (Binary oldval newval)) zero)); + match(Set cr (StoreLConditional mem (Binary oldval newval))); + effect(KILL oldval); - format %{ "cmpxchgq $mem_ptr, $newval\t# (long) " - "If rax == $mem_ptr then store $newval into $mem_ptr" %} + format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %} opcode(0x0F, 0xB1); ins_encode(lock_prefix, - REX_reg_mem_wide(newval, mem_ptr), + REX_reg_mem_wide(newval, mem), OpcP, OpcS, - reg_mem(newval, mem_ptr)); + reg_mem(newval, mem)); ins_pipe(pipe_cmpxchg); %} + +// XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them instruct compareAndSwapP(rRegI res, memory mem_ptr, rax_RegP oldval, rRegP newval, @@ -7926,7 +7913,6 @@ instruct compareAndSwapP(rRegI res, ins_pipe( pipe_cmpxchg ); %} -// XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them instruct compareAndSwapL(rRegI res, memory mem_ptr, rax_RegL oldval, rRegL newval, @@ -8876,6 +8862,7 @@ instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr) ins_pipe(ialu_reg); %} + // Logical Shift Right by 8-bit immediate instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr) %{ @@ -9585,6 +9572,18 @@ instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr) ins_pipe(ialu_reg_reg); %} +// Use any_RegP to match R15 (TLS register) without spilling. +instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{ + match(Set dst (OrL dst (CastP2X src))); + effect(KILL cr); + + format %{ "orq $dst, $src\t# long" %} + opcode(0x0B); + ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src)); + ins_pipe(ialu_reg_reg); +%} + + // Or Register with Immediate instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr) %{ diff --git a/src/os_cpu/linux_x86/vm/linux_x86_32.ad b/src/os_cpu/linux_x86/vm/linux_x86_32.ad index ec4725a1cbcd0777ec06c1394939f61d1510476d..ed92526efe4e2d1fe8405398d5f00c49e3b3db5c 100644 --- a/src/os_cpu/linux_x86/vm/linux_x86_32.ad +++ b/src/os_cpu/linux_x86/vm/linux_x86_32.ad @@ -103,16 +103,16 @@ encode %{ // This name is KNOWN by the ADLC and cannot be changed. // The ADLC forces a 'TypeRawPtr::BOTTOM' output type // for this guy. -instruct tlsLoadP(eAXRegP dst, eFlagsReg cr) %{ +instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ match(Set dst (ThreadLocal)); effect(DEF dst, KILL cr); - format %{ "MOV EAX, Thread::current()" %} + format %{ "MOV $dst, Thread::current()" %} ins_encode( linux_tlsencode(dst) ); ins_pipe( ialu_reg_fat ); %} -instruct TLS(eAXRegP dst) %{ +instruct TLS(eRegP dst) %{ match(Set dst (ThreadLocal)); expand %{ diff --git a/src/os_cpu/solaris_x86/vm/solaris_x86_32.ad b/src/os_cpu/solaris_x86/vm/solaris_x86_32.ad index 911a8ad7d13ecf8e0c1ecfb2fb1e5e79abc4f560..3f1110606ea19b250f81d4dc8c2846af7f0dee58 100644 --- a/src/os_cpu/solaris_x86/vm/solaris_x86_32.ad +++ b/src/os_cpu/solaris_x86/vm/solaris_x86_32.ad @@ -110,16 +110,16 @@ encode %{ // This name is KNOWN by the ADLC and cannot be changed. // The ADLC forces a 'TypeRawPtr::BOTTOM' output type // for this guy. -instruct tlsLoadP(eAXRegP dst, eFlagsReg cr) %{ +instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ match(Set dst (ThreadLocal)); effect(DEF dst, KILL cr); - format %{ "MOV EAX, Thread::current()" %} + format %{ "MOV $dst, Thread::current()" %} ins_encode( solaris_tlsencode(dst) ); ins_pipe( ialu_reg_fat ); %} -instruct TLS(eAXRegP dst) %{ +instruct TLS(eRegP dst) %{ match(Set dst (ThreadLocal)); expand %{ diff --git a/src/share/vm/adlc/formssel.cpp b/src/share/vm/adlc/formssel.cpp index 1146b57aeb45b0bc87acdc6952605ed5adecb275..fb91d83d2f6ccc126113d8d085587e5ee693a1c6 100644 --- a/src/share/vm/adlc/formssel.cpp +++ b/src/share/vm/adlc/formssel.cpp @@ -3324,7 +3324,7 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const { "Load8B" ,"Load4B" ,"Load8C" ,"Load4C" ,"Load2C" ,"Load8S", "Load4S","Load2S", "LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned", "LoadPLocked", "LoadLLocked", - "StorePConditional", "StoreLConditional", + "StorePConditional", "StoreIConditional", "StoreLConditional", "CompareAndSwapI", "CompareAndSwapL", "CompareAndSwapP", "CompareAndSwapN", "StoreCM", "ClearArray" diff --git a/src/share/vm/opto/c2_globals.hpp b/src/share/vm/opto/c2_globals.hpp index ab960fdc8e59a788e4ae433ad46597107e129ab6..1342091350cdd7df6533b8e5aa8e9a2c5732b8ff 100644 --- a/src/share/vm/opto/c2_globals.hpp +++ b/src/share/vm/opto/c2_globals.hpp @@ -388,6 +388,9 @@ product(intx, EliminateAllocationArraySizeLimit, 64, \ "Array size (number of elements) limit for scalar replacement") \ \ + product(bool, UseOptoBiasInlining, true, \ + "Generate biased locking code in C2 ideal graph") \ + \ product(intx, ValueSearchLimit, 1000, \ "Recursion limit in PhaseMacroExpand::value_from_mem_phi") \ \ diff --git a/src/share/vm/opto/callnode.cpp b/src/share/vm/opto/callnode.cpp index 8e8fc24da202f011ad2dce2be4e418e80da61815..b9e9cbd558748199d838934f890ed318e005c948 100644 --- a/src/share/vm/opto/callnode.cpp +++ b/src/share/vm/opto/callnode.cpp @@ -967,6 +967,7 @@ SafePointScalarObjectNode::SafePointScalarObjectNode(const TypeOopPtr* tp, init_class_id(Class_SafePointScalarObject); } +bool SafePointScalarObjectNode::pinned() const { return true; } uint SafePointScalarObjectNode::ideal_reg() const { return 0; // No matching to machine instruction diff --git a/src/share/vm/opto/callnode.hpp b/src/share/vm/opto/callnode.hpp index 20192ddf0f6231a4f8f1fa423f4272183e10b5f0..45b6dd2148bcee15c4b1450a19bd5c2eaacbb3b9 100644 --- a/src/share/vm/opto/callnode.hpp +++ b/src/share/vm/opto/callnode.hpp @@ -433,6 +433,10 @@ public: uint n_fields() const { return _n_fields; } DEBUG_ONLY(AllocateNode* alloc() const { return _alloc; }) + // SafePointScalarObject should be always pinned to the control edge + // of the SafePoint node for which it was generated. + virtual bool pinned() const; // { return true; } + virtual uint size_of() const { return sizeof(*this); } // Assumes that "this" is an argument to a safepoint node "s", and that diff --git a/src/share/vm/opto/classes.hpp b/src/share/vm/opto/classes.hpp index 0371736cf107e2948121eef9601838610f34aa66..14b431200598b0de7542fcd7d8e6a813dae42e09 100644 --- a/src/share/vm/opto/classes.hpp +++ b/src/share/vm/opto/classes.hpp @@ -205,6 +205,7 @@ macro(StoreB) macro(StoreC) macro(StoreCM) macro(StorePConditional) +macro(StoreIConditional) macro(StoreLConditional) macro(StoreD) macro(StoreF) diff --git a/src/share/vm/opto/compile.cpp b/src/share/vm/opto/compile.cpp index df1a293f112e2e221967f78c63a55f7bbaa22080..42eae3063d91758337f835863df3548cbe1bd39b 100644 --- a/src/share/vm/opto/compile.cpp +++ b/src/share/vm/opto/compile.cpp @@ -2001,6 +2001,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) { case Op_StorePConditional: case Op_StoreI: case Op_StoreL: + case Op_StoreIConditional: case Op_StoreLConditional: case Op_CompareAndSwapI: case Op_CompareAndSwapL: diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp index 52707d038b02568d526d2a6145164090e921ba49..17a5c1f79d17a1e1848230bc6b528c64959fe40f 100644 --- a/src/share/vm/opto/library_call.cpp +++ b/src/share/vm/opto/library_call.cpp @@ -3485,11 +3485,32 @@ bool LibraryCallKit::inline_native_AtomicLong_attemptUpdate() { const TypePtr *adr_type = _gvn.type(adr)->is_ptr(); int alias_idx = C->get_alias_index(adr_type); - Node *result = _gvn.transform(new (C, 5) StoreLConditionalNode(control(), memory(alias_idx), adr, newVal, oldVal)); - Node *store_proj = _gvn.transform( new (C, 1) SCMemProjNode(result)); + Node *cas = _gvn.transform(new (C, 5) StoreLConditionalNode(control(), memory(alias_idx), adr, newVal, oldVal)); + Node *store_proj = _gvn.transform( new (C, 1) SCMemProjNode(cas)); set_memory(store_proj, alias_idx); + Node *bol = _gvn.transform( new (C, 2) BoolNode( cas, BoolTest::eq ) ); - push(result); + Node *result; + // CMove node is not used to be able fold a possible check code + // after attemptUpdate() call. This code could be transformed + // into CMove node by loop optimizations. + { + RegionNode *r = new (C, 3) RegionNode(3); + result = new (C, 3) PhiNode(r, TypeInt::BOOL); + + Node *iff = create_and_xform_if(control(), bol, PROB_FAIR, COUNT_UNKNOWN); + Node *iftrue = opt_iff(r, iff); + r->init_req(1, iftrue); + result->init_req(1, intcon(1)); + result->init_req(2, intcon(0)); + + set_control(_gvn.transform(r)); + record_for_igvn(r); + + C->set_has_split_ifs(true); // Has chance for split-if optimization + } + + push(_gvn.transform(result)); return true; } diff --git a/src/share/vm/opto/loopTransform.cpp b/src/share/vm/opto/loopTransform.cpp index f1c15b08344bb89896ae2add99a2a21e74edf332..caa676f6ffd2e05c0b6073a5c83e874e44ad11ba 100644 --- a/src/share/vm/opto/loopTransform.cpp +++ b/src/share/vm/opto/loopTransform.cpp @@ -1519,6 +1519,7 @@ void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) { Node *bol = iff->in(1); if( bol && bol->req() > 1 && bol->in(1) && ((bol->in(1)->Opcode() == Op_StorePConditional ) || + (bol->in(1)->Opcode() == Op_StoreIConditional ) || (bol->in(1)->Opcode() == Op_StoreLConditional ) || (bol->in(1)->Opcode() == Op_CompareAndSwapI ) || (bol->in(1)->Opcode() == Op_CompareAndSwapL ) || diff --git a/src/share/vm/opto/macro.cpp b/src/share/vm/opto/macro.cpp index 7de433e3aaa4a05e405976591c204350faefa57a..892f50183103467b3e6305d9722d74c51ee30848 100644 --- a/src/share/vm/opto/macro.cpp +++ b/src/share/vm/opto/macro.cpp @@ -82,16 +82,31 @@ void PhaseMacroExpand::copy_call_debug_info(CallNode *oldcall, CallNode * newcal } } -Node* PhaseMacroExpand::opt_iff(Node* region, Node* iff) { - IfNode *opt_iff = transform_later(iff)->as_If(); +Node* PhaseMacroExpand::opt_bits_test(Node* ctrl, Node* region, int edge, Node* word, int mask, int bits, bool return_fast_path) { + Node* cmp; + if (mask != 0) { + Node* and_node = transform_later(new (C, 3) AndXNode(word, MakeConX(mask))); + cmp = transform_later(new (C, 3) CmpXNode(and_node, MakeConX(bits))); + } else { + cmp = word; + } + Node* bol = transform_later(new (C, 2) BoolNode(cmp, BoolTest::ne)); + IfNode* iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN ); + transform_later(iff); - // Fast path taken; set region slot 2 - Node *fast_taken = transform_later( new (C, 1) IfFalseNode(opt_iff) ); - region->init_req(2,fast_taken); // Capture fast-control + // Fast path taken. + Node *fast_taken = transform_later( new (C, 1) IfFalseNode(iff) ); // Fast path not-taken, i.e. slow path - Node *slow_taken = transform_later( new (C, 1) IfTrueNode(opt_iff) ); - return slow_taken; + Node *slow_taken = transform_later( new (C, 1) IfTrueNode(iff) ); + + if (return_fast_path) { + region->init_req(edge, slow_taken); // Capture slow-control + return fast_taken; + } else { + region->init_req(edge, fast_taken); // Capture fast-control + return slow_taken; + } } //--------------------copy_predefined_input_for_runtime_call-------------------- @@ -854,7 +869,7 @@ void PhaseMacroExpand::set_eden_pointers(Node* &eden_top_adr, Node* &eden_end_ad Node* PhaseMacroExpand::make_load(Node* ctl, Node* mem, Node* base, int offset, const Type* value_type, BasicType bt) { Node* adr = basic_plus_adr(base, offset); - const TypePtr* adr_type = TypeRawPtr::BOTTOM; + const TypePtr* adr_type = adr->bottom_type()->is_ptr(); Node* value = LoadNode::make(_igvn, ctl, mem, adr, adr_type, value_type, bt); transform_later(value); return value; @@ -1583,12 +1598,194 @@ void PhaseMacroExpand::expand_lock_node(LockNode *lock) { Node* flock = lock->fastlock_node(); // Make the merge point - Node *region = new (C, 3) RegionNode(3); + Node *region; + Node *mem_phi; + Node *slow_path; + + if (UseOptoBiasInlining) { + /* + * See the full descrition in MacroAssembler::biased_locking_enter(). + * + * if( (mark_word & biased_lock_mask) == biased_lock_pattern ) { + * // The object is biased. + * proto_node = klass->prototype_header; + * o_node = thread | proto_node; + * x_node = o_node ^ mark_word; + * if( (x_node & ~age_mask) == 0 ) { // Biased to the current thread ? + * // Done. + * } else { + * if( (x_node & biased_lock_mask) != 0 ) { + * // The klass's prototype header is no longer biased. + * cas(&mark_word, mark_word, proto_node) + * goto cas_lock; + * } else { + * // The klass's prototype header is still biased. + * if( (x_node & epoch_mask) != 0 ) { // Expired epoch? + * old = mark_word; + * new = o_node; + * } else { + * // Different thread or anonymous biased. + * old = mark_word & (epoch_mask | age_mask | biased_lock_mask); + * new = thread | old; + * } + * // Try to rebias. + * if( cas(&mark_word, old, new) == 0 ) { + * // Done. + * } else { + * goto slow_path; // Failed. + * } + * } + * } + * } else { + * // The object is not biased. + * cas_lock: + * if( FastLock(obj) == 0 ) { + * // Done. + * } else { + * slow_path: + * OptoRuntime::complete_monitor_locking_Java(obj); + * } + * } + */ + + region = new (C, 5) RegionNode(5); + // create a Phi for the memory state + mem_phi = new (C, 5) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM); + + Node* fast_lock_region = new (C, 3) RegionNode(3); + Node* fast_lock_mem_phi = new (C, 3) PhiNode( fast_lock_region, Type::MEMORY, TypeRawPtr::BOTTOM); + + // First, check mark word for the biased lock pattern. + Node* mark_node = make_load(ctrl, mem, obj, oopDesc::mark_offset_in_bytes(), TypeX_X, TypeX_X->basic_type()); + + // Get fast path - mark word has the biased lock pattern. + ctrl = opt_bits_test(ctrl, fast_lock_region, 1, mark_node, + markOopDesc::biased_lock_mask_in_place, + markOopDesc::biased_lock_pattern, true); + // fast_lock_region->in(1) is set to slow path. + fast_lock_mem_phi->init_req(1, mem); + + // Now check that the lock is biased to the current thread and has + // the same epoch and bias as Klass::_prototype_header. + + // Special-case a fresh allocation to avoid building nodes: + Node* klass_node = AllocateNode::Ideal_klass(obj, &_igvn); + if (klass_node == NULL) { + Node* k_adr = basic_plus_adr(obj, oopDesc::klass_offset_in_bytes()); + klass_node = transform_later( LoadKlassNode::make(_igvn, mem, k_adr, _igvn.type(k_adr)->is_ptr()) ); + klass_node->init_req(0, ctrl); + } + Node *proto_node = make_load(ctrl, mem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeX_X, TypeX_X->basic_type()); - Node *bol = transform_later(new (C, 2) BoolNode(flock,BoolTest::ne)); - Node *iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN ); - // Optimize test; set region slot 2 - Node *slow_path = opt_iff(region,iff); + Node* thread = transform_later(new (C, 1) ThreadLocalNode()); + Node* cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread)); + Node* o_node = transform_later(new (C, 3) OrXNode(cast_thread, proto_node)); + Node* x_node = transform_later(new (C, 3) XorXNode(o_node, mark_node)); + + // Get slow path - mark word does NOT match the value. + Node* not_biased_ctrl = opt_bits_test(ctrl, region, 3, x_node, + (~markOopDesc::age_mask_in_place), 0); + // region->in(3) is set to fast path - the object is biased to the current thread. + mem_phi->init_req(3, mem); + + + // Mark word does NOT match the value (thread | Klass::_prototype_header). + + + // First, check biased pattern. + // Get fast path - _prototype_header has the same biased lock pattern. + ctrl = opt_bits_test(not_biased_ctrl, fast_lock_region, 2, x_node, + markOopDesc::biased_lock_mask_in_place, 0, true); + + not_biased_ctrl = fast_lock_region->in(2); // Slow path + // fast_lock_region->in(2) - the prototype header is no longer biased + // and we have to revoke the bias on this object. + // We are going to try to reset the mark of this object to the prototype + // value and fall through to the CAS-based locking scheme. + Node* adr = basic_plus_adr(obj, oopDesc::mark_offset_in_bytes()); + Node* cas = new (C, 5) StoreXConditionalNode(not_biased_ctrl, mem, adr, + proto_node, mark_node); + transform_later(cas); + Node* proj = transform_later( new (C, 1) SCMemProjNode(cas)); + fast_lock_mem_phi->init_req(2, proj); + + + // Second, check epoch bits. + Node* rebiased_region = new (C, 3) RegionNode(3); + Node* old_phi = new (C, 3) PhiNode( rebiased_region, TypeX_X); + Node* new_phi = new (C, 3) PhiNode( rebiased_region, TypeX_X); + + // Get slow path - mark word does NOT match epoch bits. + Node* epoch_ctrl = opt_bits_test(ctrl, rebiased_region, 1, x_node, + markOopDesc::epoch_mask_in_place, 0); + // The epoch of the current bias is not valid, attempt to rebias the object + // toward the current thread. + rebiased_region->init_req(2, epoch_ctrl); + old_phi->init_req(2, mark_node); + new_phi->init_req(2, o_node); + + // rebiased_region->in(1) is set to fast path. + // The epoch of the current bias is still valid but we know + // nothing about the owner; it might be set or it might be clear. + Node* cmask = MakeConX(markOopDesc::biased_lock_mask_in_place | + markOopDesc::age_mask_in_place | + markOopDesc::epoch_mask_in_place); + Node* old = transform_later(new (C, 3) AndXNode(mark_node, cmask)); + cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread)); + Node* new_mark = transform_later(new (C, 3) OrXNode(cast_thread, old)); + old_phi->init_req(1, old); + new_phi->init_req(1, new_mark); + + transform_later(rebiased_region); + transform_later(old_phi); + transform_later(new_phi); + + // Try to acquire the bias of the object using an atomic operation. + // If this fails we will go in to the runtime to revoke the object's bias. + cas = new (C, 5) StoreXConditionalNode(rebiased_region, mem, adr, + new_phi, old_phi); + transform_later(cas); + proj = transform_later( new (C, 1) SCMemProjNode(cas)); + + // Get slow path - Failed to CAS. + not_biased_ctrl = opt_bits_test(rebiased_region, region, 4, cas, 0, 0); + mem_phi->init_req(4, proj); + // region->in(4) is set to fast path - the object is rebiased to the current thread. + + // Failed to CAS. + slow_path = new (C, 3) RegionNode(3); + Node *slow_mem = new (C, 3) PhiNode( slow_path, Type::MEMORY, TypeRawPtr::BOTTOM); + + slow_path->init_req(1, not_biased_ctrl); // Capture slow-control + slow_mem->init_req(1, proj); + + // Call CAS-based locking scheme (FastLock node). + + transform_later(fast_lock_region); + transform_later(fast_lock_mem_phi); + + // Get slow path - FastLock failed to lock the object. + ctrl = opt_bits_test(fast_lock_region, region, 2, flock, 0, 0); + mem_phi->init_req(2, fast_lock_mem_phi); + // region->in(2) is set to fast path - the object is locked to the current thread. + + slow_path->init_req(2, ctrl); // Capture slow-control + slow_mem->init_req(2, fast_lock_mem_phi); + + transform_later(slow_path); + transform_later(slow_mem); + // Reset lock's memory edge. + lock->set_req(TypeFunc::Memory, slow_mem); + + } else { + region = new (C, 3) RegionNode(3); + // create a Phi for the memory state + mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM); + + // Optimize test; set region slot 2 + slow_path = opt_bits_test(ctrl, region, 2, flock, 0, 0); + mem_phi->init_req(2, mem); + } // Make slow path call CallNode *call = make_slow_call( (CallNode *) lock, OptoRuntime::complete_monitor_enter_Type(), OptoRuntime::complete_monitor_locking_Java(), NULL, slow_path, obj, box ); @@ -1614,16 +1811,11 @@ void PhaseMacroExpand::expand_lock_node(LockNode *lock) { transform_later(region); _igvn.subsume_node(_fallthroughproj, region); - // create a Phi for the memory state - Node *mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM); - Node *memproj = transform_later( new (C, 1) ProjNode(call, TypeFunc::Memory) ); + Node *memproj = transform_later( new(C, 1) ProjNode(call, TypeFunc::Memory) ); mem_phi->init_req(1, memproj ); - mem_phi->init_req(2, mem); transform_later(mem_phi); - _igvn.hash_delete(_memproj_fallthrough); + _igvn.hash_delete(_memproj_fallthrough); _igvn.subsume_node(_memproj_fallthrough, mem_phi); - - } //------------------------------expand_unlock_node---------------------- @@ -1637,14 +1829,31 @@ void PhaseMacroExpand::expand_unlock_node(UnlockNode *unlock) { // No need for a null check on unlock // Make the merge point - RegionNode *region = new (C, 3) RegionNode(3); + Node *region; + Node *mem_phi; + + if (UseOptoBiasInlining) { + // Check for biased locking unlock case, which is a no-op. + // See the full descrition in MacroAssembler::biased_locking_exit(). + region = new (C, 4) RegionNode(4); + // create a Phi for the memory state + mem_phi = new (C, 4) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM); + mem_phi->init_req(3, mem); + + Node* mark_node = make_load(ctrl, mem, obj, oopDesc::mark_offset_in_bytes(), TypeX_X, TypeX_X->basic_type()); + ctrl = opt_bits_test(ctrl, region, 3, mark_node, + markOopDesc::biased_lock_mask_in_place, + markOopDesc::biased_lock_pattern); + } else { + region = new (C, 3) RegionNode(3); + // create a Phi for the memory state + mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM); + } FastUnlockNode *funlock = new (C, 3) FastUnlockNode( ctrl, obj, box ); funlock = transform_later( funlock )->as_FastUnlock(); - Node *bol = transform_later(new (C, 2) BoolNode(funlock,BoolTest::ne)); - Node *iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN ); // Optimize test; set region slot 2 - Node *slow_path = opt_iff(region,iff); + Node *slow_path = opt_bits_test(ctrl, region, 2, funlock, 0, 0); CallNode *call = make_slow_call( (CallNode *) unlock, OptoRuntime::complete_monitor_exit_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), "complete_monitor_unlocking_C", slow_path, obj, box ); @@ -1666,16 +1875,12 @@ void PhaseMacroExpand::expand_unlock_node(UnlockNode *unlock) { transform_later(region); _igvn.subsume_node(_fallthroughproj, region); - // create a Phi for the memory state - Node *mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM); Node *memproj = transform_later( new(C, 1) ProjNode(call, TypeFunc::Memory) ); mem_phi->init_req(1, memproj ); mem_phi->init_req(2, mem); transform_later(mem_phi); - _igvn.hash_delete(_memproj_fallthrough); + _igvn.hash_delete(_memproj_fallthrough); _igvn.subsume_node(_memproj_fallthrough, mem_phi); - - } //------------------------------expand_macro_nodes---------------------- diff --git a/src/share/vm/opto/macro.hpp b/src/share/vm/opto/macro.hpp index 89ed2bd2f47632d6df67c868be64f870fb33a0c2..d5c28a461bb7bff2ce61cb441cd7cf28a08ed861 100644 --- a/src/share/vm/opto/macro.hpp +++ b/src/share/vm/opto/macro.hpp @@ -93,7 +93,7 @@ private: int replace_input(Node *use, Node *oldref, Node *newref); void copy_call_debug_info(CallNode *oldcall, CallNode * newcall); - Node* opt_iff(Node* region, Node* iff); + Node* opt_bits_test(Node* ctrl, Node* region, int edge, Node* word, int mask, int bits, bool return_fast_path = false); void copy_predefined_input_for_runtime_call(Node * ctrl, CallNode* oldcall, CallNode* call); CallNode* make_slow_call(CallNode *oldcall, const TypeFunc* slow_call_type, address slow_call, const char* leaf_name, Node* slow_path, Node* parm0, Node* parm1); diff --git a/src/share/vm/opto/matcher.cpp b/src/share/vm/opto/matcher.cpp index 1fc7915b9d620074d5395ec91f05d28e2e108e59..582cf2740b9df2e081010f50d7f24d4dfd382ff1 100644 --- a/src/share/vm/opto/matcher.cpp +++ b/src/share/vm/opto/matcher.cpp @@ -1951,6 +1951,7 @@ void Matcher::find_shared( Node *n ) { // Now hack a few special opcodes switch( n->Opcode() ) { // Handle some opcodes special case Op_StorePConditional: + case Op_StoreIConditional: case Op_StoreLConditional: case Op_CompareAndSwapI: case Op_CompareAndSwapL: diff --git a/src/share/vm/opto/memnode.cpp b/src/share/vm/opto/memnode.cpp index ea7e062aef9b92ca186a52a2ebd4249e9cff52fd..4cfc0d233f3437e27d857584f8ea2362fad40e55 100644 --- a/src/share/vm/opto/memnode.cpp +++ b/src/share/vm/opto/memnode.cpp @@ -227,6 +227,14 @@ Node *MemNode::Ideal_common(PhaseGVN *phase, bool can_reshape) { const Type *t_adr = phase->type( address ); if( t_adr == Type::TOP ) return NodeSentinel; // caller will return NULL + PhaseIterGVN *igvn = phase->is_IterGVN(); + if( can_reshape && igvn != NULL && igvn->_worklist.member(address) ) { + // The address's base and type may change when the address is processed. + // Delay this mem node transformation until the address is processed. + phase->is_IterGVN()->_worklist.push(this); + return NodeSentinel; // caller will return NULL + } + // Avoid independent memory operations Node* old_mem = mem; diff --git a/src/share/vm/opto/memnode.hpp b/src/share/vm/opto/memnode.hpp index 2b40a676c4531a0ce6040a90db4937f2c9181045..e1a84febc448ad28f7923d20579150a37334709f 100644 --- a/src/share/vm/opto/memnode.hpp +++ b/src/share/vm/opto/memnode.hpp @@ -632,6 +632,17 @@ public: virtual uint ideal_reg() const { return Op_RegFlags; } }; +//------------------------------StoreIConditionalNode--------------------------- +// Conditionally store int to memory, if no change since prior +// load-locked. Sets flags for success or failure of the store. +class StoreIConditionalNode : public LoadStoreNode { +public: + StoreIConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ii ) : LoadStoreNode(c, mem, adr, val, ii) { } + virtual int Opcode() const; + // Produces flags + virtual uint ideal_reg() const { return Op_RegFlags; } +}; + //------------------------------StoreLConditionalNode--------------------------- // Conditionally store long to memory, if no change since prior // load-locked. Sets flags for success or failure of the store. @@ -639,6 +650,8 @@ class StoreLConditionalNode : public LoadStoreNode { public: StoreLConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ll ) : LoadStoreNode(c, mem, adr, val, ll) { } virtual int Opcode() const; + // Produces flags + virtual uint ideal_reg() const { return Op_RegFlags; } }; diff --git a/src/share/vm/opto/type.hpp b/src/share/vm/opto/type.hpp index 69bc06a738482ac226fbb2c6bfe582aeb8d36fa2..3d3f3ddd883f74aa132e0fb8aaae62b1ec53cbf3 100644 --- a/src/share/vm/opto/type.hpp +++ b/src/share/vm/opto/type.hpp @@ -1183,6 +1183,9 @@ inline bool Type::is_floatingpoint() const { #define RShiftXNode RShiftLNode // For card marks and hashcodes #define URShiftXNode URShiftLNode +// UseOptoBiasInlining +#define XorXNode XorLNode +#define StoreXConditionalNode StoreLConditionalNode // Opcodes #define Op_LShiftX Op_LShiftL #define Op_AndX Op_AndL @@ -1222,6 +1225,9 @@ inline bool Type::is_floatingpoint() const { #define RShiftXNode RShiftINode // For card marks and hashcodes #define URShiftXNode URShiftINode +// UseOptoBiasInlining +#define XorXNode XorINode +#define StoreXConditionalNode StoreIConditionalNode // Opcodes #define Op_LShiftX Op_LShiftI #define Op_AndX Op_AndI diff --git a/src/share/vm/runtime/arguments.cpp b/src/share/vm/runtime/arguments.cpp index f46aa9c2fb5816c9d0f09e91b29a5cc4f159fc4e..fa2ba23383be4b04a1f7c48f3065f27328bfea57 100644 --- a/src/share/vm/runtime/arguments.cpp +++ b/src/share/vm/runtime/arguments.cpp @@ -2628,6 +2628,12 @@ jint Arguments::parse(const JavaVMInitArgs* args) { FLAG_SET_DEFAULT(UseBiasedLocking, false); #endif /* CC_INTERP */ +#ifdef COMPILER2 + if (!UseBiasedLocking || EmitSync != 0) { + UseOptoBiasInlining = false; + } +#endif + if (PrintCommandLineFlags) { CommandLineFlags::printSetFlags(); } diff --git a/src/share/vm/utilities/vmError.cpp b/src/share/vm/utilities/vmError.cpp index 71ab720f028c5faf0a19b5a7605a878c24a24cd8..a9d7a72f1cdcb2196c680fe7001f92b601ee7184 100644 --- a/src/share/vm/utilities/vmError.cpp +++ b/src/share/vm/utilities/vmError.cpp @@ -263,7 +263,7 @@ void VMError::report(outputStream* st) { st->print("# java.lang.OutOfMemoryError: "); if (_size) { st->print("requested "); - sprintf(buf,"%d",_size); + sprintf(buf,SIZE_FORMAT,_size); st->print(buf); st->print(" bytes"); if (_message != NULL) {