From 2a73bdabdd27d54c2025d6dcbc2c0459c7e63fbf Mon Sep 17 00:00:00 2001 From: johnc Date: Wed, 31 Aug 2011 10:16:02 -0700 Subject: [PATCH] 7066841: remove MacroAssembler::br_on_reg_cond() on sparc Summary: Remove the macro assembler routine br_on_reg_cond() and replace the remaining calls to that routine with an equivalent. Reviewed-by: kvn, iveresov --- src/cpu/sparc/vm/assembler_sparc.cpp | 132 ++++-------------- src/cpu/sparc/vm/assembler_sparc.hpp | 6 - src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp | 32 ++--- src/cpu/sparc/vm/c1_Runtime1_sparc.cpp | 33 +++-- .../vm/gc_implementation/g1/g1_globals.hpp | 3 - 5 files changed, 60 insertions(+), 146 deletions(-) diff --git a/src/cpu/sparc/vm/assembler_sparc.cpp b/src/cpu/sparc/vm/assembler_sparc.cpp index 4e5d90eaa..e70328bd9 100644 --- a/src/cpu/sparc/vm/assembler_sparc.cpp +++ b/src/cpu/sparc/vm/assembler_sparc.cpp @@ -2161,29 +2161,6 @@ void MacroAssembler::br_notnull( Register s1, bool a, Predict p, Label& L ) { #endif } -void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, - Register s1, address d, - relocInfo::relocType rt ) { - assert_not_delayed(); - if (VM_Version::v9_instructions_work()) { - bpr(rc, a, p, s1, d, rt); - } else { - tst(s1); - br(reg_cond_to_cc_cond(rc), a, p, d, rt); - } -} - -void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, - Register s1, Label& L ) { - assert_not_delayed(); - if (VM_Version::v9_instructions_work()) { - bpr(rc, a, p, s1, L); - } else { - tst(s1); - br(reg_cond_to_cc_cond(rc), a, p, L); - } -} - // Compare registers and branch with nop in delay slot or cbcond without delay slot. // Compare integer (32 bit) values (icc only). @@ -4340,22 +4317,29 @@ static void generate_satb_log_enqueue(bool with_frame) { } else { pre_val = O0; } + int satb_q_index_byte_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_index()); + int satb_q_buf_byte_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_buf()); + assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) && in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), "check sizes in assembly below"); __ bind(restart); + + // Load the index into the SATB buffer. PtrQueue::_index is a size_t + // so ld_ptr is appropriate. __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0); - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); - // If the branch is taken, no harm in executing this in the delay slot. - __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); + // index == 0? + __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill); + + __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); __ sub(L0, oopSize, L0); __ st_ptr(pre_val, L1, L0); // [_buf + index] := I0 @@ -4466,9 +4450,8 @@ void MacroAssembler::g1_write_barrier_pre(Register obj, tmp); } - // Check on whether to annul. - br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); - delayed()->nop(); + // Is marking active? + cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered); // Do we need to load the previous value? if (obj != noreg) { @@ -4490,9 +4473,7 @@ void MacroAssembler::g1_write_barrier_pre(Register obj, assert(pre_val != noreg, "must have a real register"); // Is the previous value null? - // Check on whether to annul. - br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered); - delayed()->nop(); + cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered); // OK, it's not filtered, so we'll need to call enqueue. In the normal // case, pre_val will be a scratch G-reg, but there are some cases in @@ -4519,39 +4500,6 @@ void MacroAssembler::g1_write_barrier_pre(Register obj, bind(filtered); } -static jint num_ct_writes = 0; -static jint num_ct_writes_filtered_in_hr = 0; -static jint num_ct_writes_filtered_null = 0; -static G1CollectedHeap* g1 = NULL; - -static Thread* count_ct_writes(void* filter_val, void* new_val) { - Atomic::inc(&num_ct_writes); - if (filter_val == NULL) { - Atomic::inc(&num_ct_writes_filtered_in_hr); - } else if (new_val == NULL) { - Atomic::inc(&num_ct_writes_filtered_null); - } else { - if (g1 == NULL) { - g1 = G1CollectedHeap::heap(); - } - } - if ((num_ct_writes % 1000000) == 0) { - jint num_ct_writes_filtered = - num_ct_writes_filtered_in_hr + - num_ct_writes_filtered_null; - - tty->print_cr("%d potential CT writes: %5.2f%% filtered\n" - " (%5.2f%% intra-HR, %5.2f%% null).", - num_ct_writes, - 100.0*(float)num_ct_writes_filtered/(float)num_ct_writes, - 100.0*(float)num_ct_writes_filtered_in_hr/ - (float)num_ct_writes, - 100.0*(float)num_ct_writes_filtered_null/ - (float)num_ct_writes); - } - return Thread::current(); -} - static address dirty_card_log_enqueue = 0; static u_char* dirty_card_log_enqueue_end = 0; @@ -4574,11 +4522,8 @@ static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) { __ set(addrlit, O1); // O1 := __ ldub(O0, O1, O2); // O2 := [O0 + O1] - __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, - O2, not_already_dirty); - // Get O1 + O2 into a reg by itself -- useful in the take-the-branch - // case, harmless if not. - __ delayed()->add(O0, O1, O3); + assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code"); + __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty); // We didn't take the branch, so we're already dirty: return. // Use return-from-leaf @@ -4587,8 +4532,13 @@ static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) { // Not dirty. __ bind(not_already_dirty); + + // Get O0 + O1 into a reg by itself + __ add(O0, O1, O3); + // First, dirty it. __ stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). + int dirty_card_q_index_byte_offset = in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_index()); @@ -4596,12 +4546,15 @@ static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) { in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_buf()); __ bind(restart); + + // Load the index into the update buffer. PtrQueue::_index is + // a size_t so ld_ptr is appropriate here. __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, - L0, refill); - // If the branch is taken, no harm in executing this in the delay slot. - __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); + // index == 0? + __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill); + + __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); __ sub(L0, oopSize, L0); __ st_ptr(O3, L1, L0); // [_buf + index] := I0 @@ -4664,6 +4617,7 @@ void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set(); assert(bs->kind() == BarrierSet::G1SATBCT || bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier"); + if (G1RSBarrierRegionFilter) { xor3(store_addr, new_val, tmp); #ifdef _LP64 @@ -4672,33 +4626,8 @@ void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp); #endif - if (G1PrintCTFilterStats) { - guarantee(tmp->is_global(), "Or stats won't work..."); - // This is a sleazy hack: I'm temporarily hijacking G2, which I - // promise to restore. - mov(new_val, G2); - save_frame(0); - mov(tmp, O0); - mov(G2, O1); - // Save G-regs that target may use. - mov(G1, L1); - mov(G2, L2); - mov(G3, L3); - mov(G4, L4); - mov(G5, L5); - call(CAST_FROM_FN_PTR(address, &count_ct_writes)); - delayed()->nop(); - mov(O0, G2); - // Restore G-regs that target may have used. - mov(L1, G1); - mov(L3, G3); - mov(L4, G4); - mov(L5, G5); - restore(G0, G0, G0); - } - // XXX Should I predict this taken or not? Does it mattern? - br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); - delayed()->nop(); + // XXX Should I predict this taken or not? Does it matter? + cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered); } // If the "store_addr" register is an "in" or "local" register, move it to @@ -4723,7 +4652,6 @@ void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val restore(); bind(filtered); - } #endif // SERIALGC diff --git a/src/cpu/sparc/vm/assembler_sparc.hpp b/src/cpu/sparc/vm/assembler_sparc.hpp index 4462ee5d0..0b3c258ab 100644 --- a/src/cpu/sparc/vm/assembler_sparc.hpp +++ b/src/cpu/sparc/vm/assembler_sparc.hpp @@ -1940,12 +1940,6 @@ class MacroAssembler: public Assembler { void br_null ( Register s1, bool a, Predict p, Label& L ); void br_notnull( Register s1, bool a, Predict p, Label& L ); - // These versions will do the most efficient thing on v8 and v9. Perhaps - // this is what the routine above was meant to do, but it didn't (and - // didn't cover both target address kinds.) - void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none ); - void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, Label& L); - // // Compare registers and branch with nop in delay slot or cbcond without delay slot. // diff --git a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp index 9124e8eaf..e15515304 100644 --- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp +++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp @@ -421,8 +421,7 @@ void G1PreBarrierStub::emit_code(LIR_Assembler* ce) { } if (__ is_in_wdisp16_range(_continuation)) { - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, - pre_val_reg, _continuation); + __ br_null(pre_val_reg, /*annul*/false, Assembler::pt, _continuation); } else { __ cmp(pre_val_reg, G0); __ brx(Assembler::equal, false, Assembler::pn, _continuation); @@ -458,8 +457,7 @@ void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) { // The original src operand was not a constant. // Generate src == null? if (__ is_in_wdisp16_range(_continuation)) { - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, - src_reg, _continuation); + __ br_null(src_reg, /*annul*/false, Assembler::pt, _continuation); } else { __ cmp(src_reg, G0); __ brx(Assembler::equal, false, Assembler::pt, _continuation); @@ -476,13 +474,9 @@ void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) { Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc)); __ ld(ref_type_adr, tmp_reg); - if (__ is_in_wdisp16_range(_continuation)) { - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, - tmp_reg, _continuation); - } else { - __ cmp(tmp_reg, G0); - __ brx(Assembler::equal, false, Assembler::pt, _continuation); - } + // _reference_type field is of type ReferenceType (enum) + assert(REF_NONE == 0, "check this code"); + __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt); __ delayed()->nop(); // Is marking active? @@ -498,13 +492,8 @@ void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) { assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); __ ldsb(in_progress, tmp_reg); } - if (__ is_in_wdisp16_range(_continuation)) { - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, - tmp_reg, _continuation); - } else { - __ cmp(tmp_reg, G0); - __ brx(Assembler::equal, false, Assembler::pt, _continuation); - } + + __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt); __ delayed()->nop(); // val == null? @@ -512,8 +501,7 @@ void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) { Register val_reg = val()->as_register(); if (__ is_in_wdisp16_range(_continuation)) { - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, - val_reg, _continuation); + __ br_null(val_reg, /*annul*/false, Assembler::pt, _continuation); } else { __ cmp(val_reg, G0); __ brx(Assembler::equal, false, Assembler::pt, _continuation); @@ -542,9 +530,9 @@ void G1PostBarrierStub::emit_code(LIR_Assembler* ce) { assert(new_val()->is_register(), "Precondition."); Register addr_reg = addr()->as_pointer_register(); Register new_val_reg = new_val()->as_register(); + if (__ is_in_wdisp16_range(_continuation)) { - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, - new_val_reg, _continuation); + __ br_null(new_val_reg, /*annul*/false, Assembler::pt, _continuation); } else { __ cmp(new_val_reg, G0); __ brx(Assembler::equal, false, Assembler::pn, _continuation); diff --git a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp index f95756b1f..f2cfbdc1b 100644 --- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp +++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp @@ -834,14 +834,16 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { int satb_q_buf_byte_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_buf()); + __ bind(restart); + // Load the index into the SATB buffer. PtrQueue::_index is a + // size_t so ld_ptr is appropriate __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp); - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, - Assembler::pn, tmp, refill); + // index == 0? + __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pn, refill); - // If the branch is taken, no harm in executing this in the delay slot. - __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2); + __ ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2); __ sub(tmp, oopSize, tmp); __ st_ptr(pre_val, tmp2, tmp); // [_buf + index] := @@ -901,11 +903,8 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { __ set(rs, cardtable); // cardtable := __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable] - __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, - tmp, not_already_dirty); - // Get cardtable + tmp into a reg by itself -- useful in the take-the-branch - // case, harmless if not. - __ delayed()->add(addr, cardtable, tmp2); + assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code"); + __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, not_already_dirty); // We didn't take the branch, so we're already dirty: return. // Use return-from-leaf @@ -914,6 +913,10 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { // Not dirty. __ bind(not_already_dirty); + + // Get cardtable + tmp into a reg by itself + __ add(addr, cardtable, tmp2); + // First, dirty it. __ stb(G0, tmp2, 0); // [cardPtr] := 0 (i.e., dirty). @@ -929,13 +932,17 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { int dirty_card_q_buf_byte_offset = in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_buf()); + __ bind(restart); + + // Get the index into the update buffer. PtrQueue::_index is + // a size_t so ld_ptr is appropriate here. __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3); - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, - tmp3, refill); - // If the branch is taken, no harm in executing this in the delay slot. - __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4); + // index == 0? + __ cmp_and_brx_short(tmp3, G0, Assembler::equal, Assembler::pn, refill); + + __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4); __ sub(tmp3, oopSize, tmp3); __ st_ptr(tmp2, tmp4, tmp3); // [_buf + index] := diff --git a/src/share/vm/gc_implementation/g1/g1_globals.hpp b/src/share/vm/gc_implementation/g1/g1_globals.hpp index 0daabdb5d..94f0ada69 100644 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp @@ -124,9 +124,6 @@ develop(bool, G1RSBarrierNullFilter, true, \ "If true, generate null-pointer filtering code in RS barrier") \ \ - develop(bool, G1PrintCTFilterStats, false, \ - "If true, print stats on RS filtering effectiveness") \ - \ develop(bool, G1DeferredRSUpdate, true, \ "If true, use deferred RS updates") \ \ -- GitLab