From adef30f613ff25b3c6ab59240c9fee49da9a99e0 Mon Sep 17 00:00:00 2001 From: iveresov Date: Fri, 6 Mar 2009 13:50:14 -0800 Subject: [PATCH] 6720309: G1: don't synchronously update RSet during evacuation pauses 6720334: G1: don't update RSets of collection set regions during an evacuation pause Summary: Introduced a deferred update mechanism for delaying the rset updates during the collection pause Reviewed-by: apetrusenko, tonyp --- .../g1/concurrentG1RefineThread.cpp | 4 +- .../gc_implementation/g1/dirtyCardQueue.cpp | 4 +- .../gc_implementation/g1/dirtyCardQueue.hpp | 2 +- .../gc_implementation/g1/g1CollectedHeap.cpp | 136 ++++++++++++++---- .../gc_implementation/g1/g1CollectedHeap.hpp | 7 + .../vm/gc_implementation/g1/g1RemSet.cpp | 115 +++++++++++---- .../vm/gc_implementation/g1/g1RemSet.hpp | 1 + .../gc_implementation/g1/g1RemSet.inline.hpp | 42 +++--- .../vm/gc_implementation/g1/g1_globals.hpp | 3 + .../vm/gc_implementation/g1/ptrQueue.cpp | 77 ++++++++-- .../vm/gc_implementation/g1/ptrQueue.hpp | 13 +- src/share/vm/memory/cardTableModRefBS.cpp | 52 ++++++- src/share/vm/memory/cardTableModRefBS.hpp | 26 +++- 13 files changed, 380 insertions(+), 102 deletions(-) diff --git a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp index 110c08327..b099908a7 100644 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp @@ -133,14 +133,12 @@ void ConcurrentG1RefineThread::queueBasedRefinement() { _co_tracker.update(false); if (G1SmoothConcRefine) { - start_vtime_sec = os::elapsedVTime(); prev_buffer_num = curr_buffer_num; - _sts.leave(); os::sleep(Thread::current(), (jlong) _interval_ms, false); _sts.join(); + start_vtime_sec = os::elapsedVTime(); } - n_logs++; } // Make sure we harvest the PYA, if any. diff --git a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp index f3934812d..ec26c6a46 100644 --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp @@ -78,8 +78,8 @@ size_t DirtyCardQueueSet::num_par_ids() { void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, int max_completed_queue, - Mutex* lock) { - PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue); + Mutex* lock, PtrQueueSet* fl_owner) { + PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue, fl_owner); set_buffer_size(DCQBarrierQueueBufferSize); set_process_completed_threshold(DCQBarrierProcessCompletedThreshold); diff --git a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp index 86876fd94..7a6f3f27b 100644 --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp @@ -88,7 +88,7 @@ public: void initialize(Monitor* cbl_mon, Mutex* fl_lock, int max_completed_queue = 0, - Mutex* lock = NULL); + Mutex* lock = NULL, PtrQueueSet* fl_owner = NULL); // The number of parallel ids that can be claimed to allow collector or // mutator threads to do card-processing work. diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp index 8d7b5e188..efc42b440 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -136,6 +136,14 @@ public: int calls() { return _calls; } }; +class RedirtyLoggedCardTableEntryFastClosure : public CardTableEntryClosure { +public: + bool do_card_ptr(jbyte* card_ptr, int worker_i) { + *card_ptr = CardTableModRefBS::dirty_card_val(); + return true; + } +}; + YoungList::YoungList(G1CollectedHeap* g1h) : _g1h(g1h), _head(NULL), _scan_only_head(NULL), _scan_only_tail(NULL), _curr_scan_only(NULL), @@ -961,7 +969,8 @@ void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs, // dirtied, so this should abandon those logs, and set "do_traversal" // to true. concurrent_g1_refine()->set_pya_restart(); - + assert(!G1DeferredRSUpdate + || (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any"); assert(regions_accounted_for(), "Region leakage!"); } @@ -1466,6 +1475,13 @@ jint G1CollectedHeap::initialize() { G1DirtyCardQueueMax, Shared_DirtyCardQ_lock); } + if (G1DeferredRSUpdate) { + dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, + DirtyCardQ_FL_lock, + 0, + Shared_DirtyCardQ_lock, + &JavaThread::dirty_card_queue_set()); + } // In case we're keeping closure specialization stats, initialize those // counts and that mechanism. SpecializationStats::clear(); @@ -2918,27 +2934,51 @@ public: } }; -class RecreateRSetEntriesClosure: public OopClosure { +class UpdateRSetImmediate : public OopsInHeapRegionClosure { private: G1CollectedHeap* _g1; G1RemSet* _g1_rem_set; - HeapRegion* _from; public: - RecreateRSetEntriesClosure(G1CollectedHeap* g1, HeapRegion* from) : - _g1(g1), _g1_rem_set(g1->g1_rem_set()), _from(from) - {} + UpdateRSetImmediate(G1CollectedHeap* g1) : + _g1(g1), _g1_rem_set(g1->g1_rem_set()) {} + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + void do_oop(oop* p) { + assert(_from->is_in_reserved(p), "paranoia"); + if (*p != NULL && !_from->is_survivor()) { + _g1_rem_set->par_write_ref(_from, p, 0); + } + } +}; + +class UpdateRSetDeferred : public OopsInHeapRegionClosure { +private: + G1CollectedHeap* _g1; + DirtyCardQueue *_dcq; + CardTableModRefBS* _ct_bs; + +public: + UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) : + _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {} void do_oop(narrowOop* p) { guarantee(false, "NYI"); } void do_oop(oop* p) { assert(_from->is_in_reserved(p), "paranoia"); - if (*p != NULL) { - _g1_rem_set->write_ref(_from, p); + if (!_from->is_in_reserved(*p) && !_from->is_survivor()) { + size_t card_index = _ct_bs->index_for(p); + if (_ct_bs->mark_card_deferred(card_index)) { + _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index)); + } } } }; + + class RemoveSelfPointerClosure: public ObjectClosure { private: G1CollectedHeap* _g1; @@ -2946,11 +2986,11 @@ private: HeapRegion* _hr; size_t _prev_marked_bytes; size_t _next_marked_bytes; + OopsInHeapRegionClosure *_cl; public: - RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr) : - _g1(g1), _cm(_g1->concurrent_mark()), _hr(hr), - _prev_marked_bytes(0), _next_marked_bytes(0) - {} + RemoveSelfPointerClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* cl) : + _g1(g1), _cm(_g1->concurrent_mark()), _prev_marked_bytes(0), + _next_marked_bytes(0), _cl(cl) {} size_t prev_marked_bytes() { return _prev_marked_bytes; } size_t next_marked_bytes() { return _next_marked_bytes; } @@ -2988,8 +3028,7 @@ public: // that, if evacuation fails, we might have remembered set // entries missing given that we skipped cards on the // collection set. So, we'll recreate such entries now. - RecreateRSetEntriesClosure cl(_g1, _hr); - obj->oop_iterate(&cl); + obj->oop_iterate(_cl); assert(_cm->isPrevMarked(obj), "Should be marked!"); } else { // The object has been either evacuated or is dead. Fill it with a @@ -3002,14 +3041,23 @@ public: }; void G1CollectedHeap::remove_self_forwarding_pointers() { + UpdateRSetImmediate immediate_update(_g1h); + DirtyCardQueue dcq(&_g1h->dirty_card_queue_set()); + UpdateRSetDeferred deferred_update(_g1h, &dcq); + OopsInHeapRegionClosure *cl; + if (G1DeferredRSUpdate) { + cl = &deferred_update; + } else { + cl = &immediate_update; + } HeapRegion* cur = g1_policy()->collection_set(); - while (cur != NULL) { assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!"); + RemoveSelfPointerClosure rspc(_g1h, cl); if (cur->evacuation_failed()) { - RemoveSelfPointerClosure rspc(_g1h, cur); assert(cur->in_collection_set(), "bad CS"); + cl->set_region(cur); cur->object_iterate(&rspc); // A number of manipulations to make the TAMS be the current top, @@ -3518,6 +3566,9 @@ class G1ParScanThreadState : public StackObj { protected: G1CollectedHeap* _g1h; RefToScanQueue* _refs; + DirtyCardQueue _dcq; + CardTableModRefBS* _ct_bs; + G1RemSet* _g1_rem; typedef GrowableArray OverflowQueue; OverflowQueue* _overflowed_refs; @@ -3559,10 +3610,32 @@ protected: void add_to_undo_waste(size_t waste) { _undo_waste += waste; } + DirtyCardQueue& dirty_card_queue() { return _dcq; } + CardTableModRefBS* ctbs() { return _ct_bs; } + + void immediate_rs_update(HeapRegion* from, oop* p, int tid) { + _g1_rem->par_write_ref(from, p, tid); + } + + void deferred_rs_update(HeapRegion* from, oop* p, int tid) { + // If the new value of the field points to the same region or + // is the to-space, we don't need to include it in the Rset updates. + if (!from->is_in_reserved(*p) && !from->is_survivor()) { + size_t card_index = ctbs()->index_for(p); + // If the card hasn't been added to the buffer, do it. + if (ctbs()->mark_card_deferred(card_index)) { + dirty_card_queue().enqueue((jbyte*)ctbs()->byte_for_index(card_index)); + } + } + } + public: G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num) : _g1h(g1h), _refs(g1h->task_queue(queue_num)), + _dcq(&g1h->dirty_card_queue_set()), + _ct_bs((CardTableModRefBS*)_g1h->barrier_set()), + _g1_rem(g1h->g1_rem_set()), _hash_seed(17), _queue_num(queue_num), _term_attempts(0), _age_table(false), @@ -3640,6 +3713,14 @@ public: int refs_to_scan() { return refs()->size(); } int overflowed_refs_to_scan() { return overflowed_refs()->length(); } + void update_rs(HeapRegion* from, oop* p, int tid) { + if (G1DeferredRSUpdate) { + deferred_rs_update(from, p, tid); + } else { + immediate_rs_update(from, p, tid); + } + } + HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) { HeapWord* obj = NULL; @@ -3808,7 +3889,6 @@ public: } }; - G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()), _par_scan_state(par_scan_state) { } @@ -3834,7 +3914,7 @@ void G1ParScanClosure::do_oop_nv(oop* p) { assert(obj == *p, "the value of *p should not have changed"); _par_scan_state->push_on_queue(p); } else { - _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); + _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } } } @@ -3972,13 +4052,13 @@ void G1ParCopyClosurepar_write_ref(_from, p, _par_scan_state->queue_num()); + _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } } // When scanning moved objs, must look at all oops. if (barrier == G1BarrierEvac && obj != NULL) { - _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); + _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } if (do_gen_barrier && obj != NULL) { @@ -4127,6 +4207,7 @@ public: G1ParScanExtRootClosure only_scan_root_cl(_g1h, &pss); G1ParScanPermClosure only_scan_perm_cl(_g1h, &pss); G1ParScanHeapRSClosure only_scan_heap_rs_cl(_g1h, &pss); + G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss); G1ParScanAndMarkPermClosure scan_mark_perm_cl(_g1h, &pss); G1ParScanAndMarkHeapRSClosure scan_mark_heap_rs_cl(_g1h, &pss); @@ -4382,7 +4463,6 @@ void G1CollectedHeap::evacuate_collection_set() { g1_rem_set()->prepare_for_oops_into_collection_set_do(); concurrent_g1_refine()->set_use_cache(false); int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); - set_par_threads(n_workers); G1ParTask g1_par_task(this, n_workers, _task_queues); @@ -4390,8 +4470,9 @@ void G1CollectedHeap::evacuate_collection_set() { change_strong_roots_parity(); // In preparation for parallel strong roots. rem_set()->prepare_for_younger_refs_iterate(true); - double start_par = os::elapsedTime(); + assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty"); + double start_par = os::elapsedTime(); if (ParallelGCThreads > 0) { // The individual threads will set their evac-failure closures. workers()->run_task(&g1_par_task); @@ -4411,8 +4492,8 @@ void G1CollectedHeap::evacuate_collection_set() { G1KeepAliveClosure keep_alive(this); JNIHandles::weak_oops_do(&is_alive, &keep_alive); } - g1_rem_set()->cleanup_after_oops_into_collection_set_do(); + concurrent_g1_refine()->set_use_cache(true); finalize_for_evac_failure(); @@ -4423,7 +4504,6 @@ void G1CollectedHeap::evacuate_collection_set() { if (evacuation_failed()) { remove_self_forwarding_pointers(); - if (PrintGCDetails) { gclog_or_tty->print(" (evacuation failed)"); } else if (PrintGC) { @@ -4431,6 +4511,14 @@ void G1CollectedHeap::evacuate_collection_set() { } } + if (G1DeferredRSUpdate) { + RedirtyLoggedCardTableEntryFastClosure redirty; + dirty_card_queue_set().set_closure(&redirty); + dirty_card_queue_set().apply_closure_to_all_completed_buffers(); + JavaThread::dirty_card_queue_set().merge_bufferlists(&dirty_card_queue_set()); + assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed"); + } + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); } diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp index 2e139b7f2..d5637ee58 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp @@ -457,6 +457,10 @@ protected: // And it's mod ref barrier set, used to track updates for the above. ModRefBarrierSet* _mr_bs; + // A set of cards that cover the objects for which the Rsets should be updated + // concurrently after the collection. + DirtyCardQueueSet _dirty_card_queue_set; + // The Heap Region Rem Set Iterator. HeapRegionRemSetIterator** _rem_set_iterator; @@ -666,6 +670,9 @@ public: RefToScanQueue *task_queue(int i); + // A set of cards where updates happened during the GC + DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; } + // Create a G1CollectedHeap with the specified policy. // Must call the initialize method afterwards. // May not return if something goes wrong. diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/src/share/vm/gc_implementation/g1/g1RemSet.cpp index 7946c41ff..bb2dfd7a6 100644 --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp @@ -177,11 +177,19 @@ HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs) _cards_scanned(NULL), _total_cards_scanned(0) { _seq_task = new SubTasksDone(NumSeqTasks); - _new_refs = NEW_C_HEAP_ARRAY(GrowableArray*, ParallelGCThreads); + guarantee(n_workers() > 0, "There should be some workers"); + _new_refs = NEW_C_HEAP_ARRAY(GrowableArray*, n_workers()); + for (uint i = 0; i < n_workers(); i++) { + _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray(8192,true); + } } HRInto_G1RemSet::~HRInto_G1RemSet() { delete _seq_task; + for (uint i = 0; i < n_workers(); i++) { + delete _new_refs[i]; + } + FREE_C_HEAP_ARRAY(GrowableArray*, _new_refs); } void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) { @@ -281,8 +289,9 @@ public: if (!_ct_bs->is_card_claimed(card_index) && !_ct_bs->is_card_dirty(card_index)) { assert(_ct_bs->is_card_clean(card_index) || - _ct_bs->is_card_claimed(card_index), - "Card is either dirty, clean, or claimed"); + _ct_bs->is_card_claimed(card_index) || + _ct_bs->is_card_deferred(card_index), + "Card is either clean, claimed or deferred"); if (_ct_bs->claim_card(card_index)) scanCard(card_index, card_region); } @@ -338,14 +347,12 @@ void HRInto_G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) { _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0); _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0); - if (ParallelGCThreads > 0) { - // In this case, we called scanNewRefsRS and recorded the corresponding - // time. - double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i); - if (scan_new_refs_time_ms > 0.0) { - closure_app_time_ms += scan_new_refs_time_ms; - } + + double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i); + if (scan_new_refs_time_ms > 0.0) { + closure_app_time_ms += scan_new_refs_time_ms; } + _g1p->record_obj_copy_time(worker_i, closure_app_time_ms); } @@ -469,8 +476,8 @@ HRInto_G1RemSet::scanNewRefsRS(OopsInHeapRegionClosure* oc, double scan_new_refs_start_sec = os::elapsedTime(); G1CollectedHeap* g1h = G1CollectedHeap::heap(); CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set()); - while (_new_refs[worker_i]->is_nonempty()) { - oop* p = _new_refs[worker_i]->pop(); + for (int i = 0; i < _new_refs[worker_i]->length(); i++) { + oop* p = _new_refs[worker_i]->at(i); oop obj = *p; // *p was in the collection set when p was pushed on "_new_refs", but // another thread may have processed this location from an RS, so it @@ -480,10 +487,6 @@ HRInto_G1RemSet::scanNewRefsRS(OopsInHeapRegionClosure* oc, HeapRegion* r = g1h->heap_region_containing(p); DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj)); - assert(ParallelGCThreads > 1 - || to->rem_set()->contains_reference(p), - "Invariant: pushed after being added." - "(Not reliable in parallel code.)"); oc->set_region(r); // If "p" has already been processed concurrently, this is // idempotent. @@ -538,8 +541,8 @@ HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, } } else { assert(worker_i == 0, "invariant"); - updateRS(0); + scanNewRefsRS(oc, 0); scanRS(oc, 0); } } @@ -559,11 +562,7 @@ prepare_for_oops_into_collection_set_do() { assert(!_par_traversal_in_progress, "Invariant between iterations."); if (ParallelGCThreads > 0) { set_par_traversal(true); - int n_workers = _g1->workers()->total_workers(); - _seq_task->set_par_threads(n_workers); - for (uint i = 0; i < ParallelGCThreads; i++) - _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray(8192,true); - + _seq_task->set_par_threads((int)n_workers()); if (cg1r->do_traversal()) { updateRS(0); // Have to do this again after updaters @@ -587,6 +586,53 @@ class cleanUpIteratorsClosure : public HeapRegionClosure { } }; +class UpdateRSetOopsIntoCSImmediate : public OopClosure { + G1CollectedHeap* _g1; +public: + UpdateRSetOopsIntoCSImmediate(G1CollectedHeap* g1) : _g1(g1) { } + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop* p) { + HeapRegion* to = _g1->heap_region_containing(*p); + if (to->in_collection_set()) { + if (to->rem_set()->add_reference(p, 0)) { + _g1->schedule_popular_region_evac(to); + } + } + } +}; + +class UpdateRSetOopsIntoCSDeferred : public OopClosure { + G1CollectedHeap* _g1; + CardTableModRefBS* _ct_bs; + DirtyCardQueue* _dcq; +public: + UpdateRSetOopsIntoCSDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) : + _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) { } + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop* p) { + oop obj = *p; + if (_g1->obj_in_cs(obj)) { + size_t card_index = _ct_bs->index_for(p); + if (_ct_bs->mark_card_deferred(card_index)) { + _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index)); + } + } + } +}; + +void HRInto_G1RemSet::new_refs_iterate(OopClosure* cl) { + for (size_t i = 0; i < n_workers(); i++) { + for (int j = 0; j < _new_refs[i]->length(); j++) { + oop* p = _new_refs[i]->at(j); + cl->do_oop(p); + } + } +} + void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() { guarantee( _cards_scanned != NULL, "invariant" ); _total_cards_scanned = 0; @@ -609,11 +655,25 @@ void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() { if (cg1r->do_traversal()) { cg1r->cg1rThread()->set_do_traversal(false); } - for (uint i = 0; i < ParallelGCThreads; i++) { - delete _new_refs[i]; - } set_par_traversal(false); } + + if (_g1->evacuation_failed()) { + // Restore remembered sets for the regions pointing into + // the collection set. + if (G1DeferredRSUpdate) { + DirtyCardQueue dcq(&_g1->dirty_card_queue_set()); + UpdateRSetOopsIntoCSDeferred deferred_update(_g1, &dcq); + new_refs_iterate(&deferred_update); + } else { + UpdateRSetOopsIntoCSImmediate immediate_update(_g1); + new_refs_iterate(&immediate_update); + } + } + for (uint i = 0; i < n_workers(); i++) { + _new_refs[i]->clear(); + } + assert(!_par_traversal_in_progress, "Invariant between iterations."); } @@ -683,7 +743,8 @@ public: bool doHeapRegion(HeapRegion* r) { if (!r->in_collection_set() && !r->continuesHumongous() && - !r->is_young()) { + !r->is_young() && + !r->is_survivor()) { _update_rs_oop_cl.set_from(r); UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl); @@ -820,7 +881,7 @@ void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) { // before all the cards on the region are dirtied. This is unlikely, // and it doesn't happen often, but it can happen. So, the extra // check below filters out those cards. - if (r->is_young()) { + if (r->is_young() || r->is_survivor()) { return; } // While we are processing RSet buffers during the collection, we diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.hpp b/src/share/vm/gc_implementation/g1/g1RemSet.hpp index 9f03d394d..e080d3771 100644 --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp @@ -155,6 +155,7 @@ protected: bool _par_traversal_in_progress; void set_par_traversal(bool b); GrowableArray** _new_refs; + void new_refs_iterate(OopClosure* cl); public: // This is called to reset dual hash tables after the gc pause diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp b/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp index e3f1b5cc8..3807b9ae4 100644 --- a/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp +++ b/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp @@ -31,24 +31,7 @@ inline size_t G1RemSet::n_workers() { } inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, oop* p) { - oop obj = *p; - assert(from != NULL && from->is_in_reserved(p), - "p is not in a from"); - HeapRegion* to = _g1->heap_region_containing(obj); - if (from != to && to != NULL) { - if (!to->popular() && !from->is_survivor()) { -#if G1_REM_SET_LOGGING - gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS" - " for region [" PTR_FORMAT ", " PTR_FORMAT ")", - p, obj, - to->bottom(), to->end()); -#endif - assert(to->rem_set() != NULL, "Need per-region 'into' remsets."); - if (to->rem_set()->add_reference(p)) { - _g1->schedule_popular_region_evac(to); - } - } - } + par_write_ref(from, p, 0); } inline void HRInto_G1RemSet::write_ref(HeapRegion* from, oop* p) { @@ -82,7 +65,22 @@ inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) { HeapRegion* to = _g1->heap_region_containing(obj); // The test below could be optimized by applying a bit op to to and from. if (to != NULL && from != NULL && from != to) { - if (!to->popular() && !from->is_survivor()) { + bool update_delayed = false; + // There is a tricky infinite loop if we keep pushing + // self forwarding pointers onto our _new_refs list. + // The _par_traversal_in_progress flag is true during the collection pause, + // false during the evacuation failure handing. + if (_par_traversal_in_progress && + to->in_collection_set() && !self_forwarded(obj)) { + _new_refs[tid]->push(p); + // Deferred updates to the Cset are either discarded (in the normal case), + // or processed (if an evacuation failure occurs) at the end + // of the collection. + // See HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do(). + update_delayed = true; + } + + if (!to->popular() && !update_delayed) { #if G1_REM_SET_LOGGING gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS" " for region [" PTR_FORMAT ", " PTR_FORMAT ")", @@ -94,11 +92,5 @@ inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) { _g1->schedule_popular_region_evac(to); } } - // There is a tricky infinite loop if we keep pushing - // self forwarding pointers onto our _new_refs list. - if (_par_traversal_in_progress && - to->in_collection_set() && !self_forwarded(obj)) { - _new_refs[tid]->push(p); - } } } diff --git a/src/share/vm/gc_implementation/g1/g1_globals.hpp b/src/share/vm/gc_implementation/g1/g1_globals.hpp index 43b1d59c8..fd23f65d1 100644 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp @@ -172,6 +172,9 @@ develop(bool, G1RSBarrierUseQueue, true, \ "If true, use queueing RS barrier") \ \ + develop(bool, G1DeferredRSUpdate, true, \ + "If true, use deferred RS updates") \ + \ develop(bool, G1RSLogCheckCardTable, false, \ "If true, verify that no dirty cards remain after RS log " \ "processing.") \ diff --git a/src/share/vm/gc_implementation/g1/ptrQueue.cpp b/src/share/vm/gc_implementation/g1/ptrQueue.cpp index c6a4faad9..1da8d5206 100644 --- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp @@ -91,15 +91,17 @@ PtrQueueSet::PtrQueueSet(bool notify_when_complete) : _n_completed_buffers(0), _process_completed_threshold(0), _process_completed(false), _buf_free_list(NULL), _buf_free_list_sz(0) -{} +{ + _fl_owner = this; +} void** PtrQueueSet::allocate_buffer() { assert(_sz > 0, "Didn't set a buffer size."); - MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); - if (_buf_free_list != NULL) { - void** res = _buf_free_list; - _buf_free_list = (void**)_buf_free_list[0]; - _buf_free_list_sz--; + MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); + if (_fl_owner->_buf_free_list != NULL) { + void** res = _fl_owner->_buf_free_list; + _fl_owner->_buf_free_list = (void**)_fl_owner->_buf_free_list[0]; + _fl_owner->_buf_free_list_sz--; // Just override the next pointer with NULL, just in case we scan this part // of the buffer. res[0] = NULL; @@ -111,10 +113,10 @@ void** PtrQueueSet::allocate_buffer() { void PtrQueueSet::deallocate_buffer(void** buf) { assert(_sz > 0, "Didn't set a buffer size."); - MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); - buf[0] = (void*)_buf_free_list; - _buf_free_list = buf; - _buf_free_list_sz++; + MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); + buf[0] = (void*)_fl_owner->_buf_free_list; + _fl_owner->_buf_free_list = buf; + _fl_owner->_buf_free_list_sz++; } void PtrQueueSet::reduce_free_list() { @@ -207,3 +209,58 @@ void PtrQueueSet::set_buffer_size(size_t sz) { void PtrQueueSet::set_process_completed_threshold(size_t sz) { _process_completed_threshold = sz; } + +// Merge lists of buffers. Notify waiting threads if the length of the list +// exceeds threshold. The source queue is emptied as a result. The queues +// must share the monitor. +void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) { + assert(_cbl_mon == src->_cbl_mon, "Should share the same lock"); + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + if (_completed_buffers_tail == NULL) { + assert(_completed_buffers_head == NULL, "Well-formedness"); + _completed_buffers_head = src->_completed_buffers_head; + _completed_buffers_tail = src->_completed_buffers_tail; + } else { + assert(_completed_buffers_head != NULL, "Well formedness"); + if (src->_completed_buffers_head != NULL) { + _completed_buffers_tail->next = src->_completed_buffers_head; + _completed_buffers_tail = src->_completed_buffers_tail; + } + } + _n_completed_buffers += src->_n_completed_buffers; + + src->_n_completed_buffers = 0; + src->_completed_buffers_head = NULL; + src->_completed_buffers_tail = NULL; + + assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL || + _completed_buffers_head != NULL && _completed_buffers_tail != NULL, + "Sanity"); + + if (!_process_completed && + _n_completed_buffers >= _process_completed_threshold) { + _process_completed = true; + if (_notify_when_complete) + _cbl_mon->notify_all(); + } +} + +// Merge free lists of the two queues. The free list of the source +// queue is emptied as a result. The queues must share the same +// mutex that guards free lists. +void PtrQueueSet::merge_freelists(PtrQueueSet* src) { + assert(_fl_lock == src->_fl_lock, "Should share the same lock"); + MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); + if (_buf_free_list != NULL) { + void **p = _buf_free_list; + while (*p != NULL) { + p = (void**)*p; + } + *p = src->_buf_free_list; + } else { + _buf_free_list = src->_buf_free_list; + } + _buf_free_list_sz += src->_buf_free_list_sz; + src->_buf_free_list = NULL; + src->_buf_free_list_sz = 0; +} diff --git a/src/share/vm/gc_implementation/g1/ptrQueue.hpp b/src/share/vm/gc_implementation/g1/ptrQueue.hpp index df5557c29..032ee3371 100644 --- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp +++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp @@ -155,6 +155,9 @@ protected: Mutex* _fl_lock; void** _buf_free_list; size_t _buf_free_list_sz; + // Queue set can share a freelist. The _fl_owner variable + // specifies the owner. It is set to "this" by default. + PtrQueueSet* _fl_owner; // The size of all buffers in the set. size_t _sz; @@ -188,10 +191,13 @@ public: // Because of init-order concerns, we can't pass these as constructor // arguments. void initialize(Monitor* cbl_mon, Mutex* fl_lock, - int max_completed_queue = 0) { + int max_completed_queue = 0, + PtrQueueSet *fl_owner = NULL) { _max_completed_queue = max_completed_queue; assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?"); - _cbl_mon = cbl_mon; _fl_lock = fl_lock; + _cbl_mon = cbl_mon; + _fl_lock = fl_lock; + _fl_owner = (fl_owner != NULL) ? fl_owner : this; } // Return an empty oop array of size _sz (required to be non-zero). @@ -228,4 +234,7 @@ public: void reduce_free_list(); size_t completed_buffers_num() { return _n_completed_buffers; } + + void merge_bufferlists(PtrQueueSet* src); + void merge_freelists(PtrQueueSet* src); }; diff --git a/src/share/vm/memory/cardTableModRefBS.cpp b/src/share/vm/memory/cardTableModRefBS.cpp index a47d62b7f..7ff64eae1 100644 --- a/src/share/vm/memory/cardTableModRefBS.cpp +++ b/src/share/vm/memory/cardTableModRefBS.cpp @@ -356,18 +356,62 @@ void CardTableModRefBS::write_ref_field_work(void* field, oop newVal) { inline_write_ref_field(field, newVal); } +/* + Claimed and deferred bits are used together in G1 during the evacuation + pause. These bits can have the following state transitions: + 1. The claimed bit can be put over any other card state. Except that + the "dirty -> dirty and claimed" transition is checked for in + G1 code and is not used. + 2. Deferred bit can be set only if the previous state of the card + was either clean or claimed. mark_card_deferred() is wait-free. + We do not care if the operation is be successful because if + it does not it will only result in duplicate entry in the update + buffer because of the "cache-miss". So it's not worth spinning. + */ + bool CardTableModRefBS::claim_card(size_t card_index) { jbyte val = _byte_map[card_index]; - if (val != claimed_card_val()) { - jbyte res = Atomic::cmpxchg((jbyte) claimed_card_val(), &_byte_map[card_index], val); - if (res == val) + assert(val != dirty_card_val(), "Shouldn't claim a dirty card"); + while (val == clean_card_val() || + (val & (clean_card_mask_val() | claimed_card_val())) != claimed_card_val()) { + jbyte new_val = val; + if (val == clean_card_val()) { + new_val = (jbyte)claimed_card_val(); + } else { + new_val = val | (jbyte)claimed_card_val(); + } + jbyte res = Atomic::cmpxchg(new_val, &_byte_map[card_index], val); + if (res == val) { return true; - else return false; + } + val = res; } return false; } +bool CardTableModRefBS::mark_card_deferred(size_t card_index) { + jbyte val = _byte_map[card_index]; + // It's already processed + if ((val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val()) { + return false; + } + // Cached bit can be installed either on a clean card or on a claimed card. + jbyte new_val = val; + if (val == clean_card_val()) { + new_val = (jbyte)deferred_card_val(); + } else { + if (val & claimed_card_val()) { + new_val = val | (jbyte)deferred_card_val(); + } + } + if (new_val != val) { + Atomic::cmpxchg(new_val, &_byte_map[card_index], val); + } + return true; +} + + void CardTableModRefBS::non_clean_card_iterate(Space* sp, MemRegion mr, DirtyCardToOopClosure* dcto_cl, diff --git a/src/share/vm/memory/cardTableModRefBS.hpp b/src/share/vm/memory/cardTableModRefBS.hpp index 9a48ab549..986230f83 100644 --- a/src/share/vm/memory/cardTableModRefBS.hpp +++ b/src/share/vm/memory/cardTableModRefBS.hpp @@ -52,11 +52,15 @@ class CardTableModRefBS: public ModRefBarrierSet { enum CardValues { clean_card = -1, + // The mask contains zeros in places for all other values. + clean_card_mask = clean_card - 31, + dirty_card = 0, precleaned_card = 1, - claimed_card = 3, - last_card = 4, - CT_MR_BS_last_reserved = 10 + claimed_card = 2, + deferred_card = 4, + last_card = 8, + CT_MR_BS_last_reserved = 16 }; // dirty and precleaned are equivalent wrt younger_refs_iter. @@ -254,9 +258,11 @@ public: }; static int clean_card_val() { return clean_card; } + static int clean_card_mask_val() { return clean_card_mask; } static int dirty_card_val() { return dirty_card; } static int claimed_card_val() { return claimed_card; } static int precleaned_card_val() { return precleaned_card; } + static int deferred_card_val() { return deferred_card; } // For RTTI simulation. bool is_a(BarrierSet::Name bsn) { @@ -329,7 +335,8 @@ public: } bool is_card_claimed(size_t card_index) { - return _byte_map[card_index] == claimed_card_val(); + jbyte val = _byte_map[card_index]; + return (val & (clean_card_mask_val() | claimed_card_val())) == claimed_card_val(); } bool claim_card(size_t card_index); @@ -338,6 +345,13 @@ public: return _byte_map[card_index] == clean_card_val(); } + bool is_card_deferred(size_t card_index) { + jbyte val = _byte_map[card_index]; + return (val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val(); + } + + bool mark_card_deferred(size_t card_index); + // Card marking array base (adjusted for heap low boundary) // This would be the 0th element of _byte_map, if the heap started at 0x0. // But since the heap starts at some higher address, this points to somewhere @@ -434,6 +448,10 @@ public: return byte_for(p) - _byte_map; } + const jbyte* byte_for_index(const size_t card_index) const { + return _byte_map + card_index; + } + void verify(); void verify_guard(); -- GitLab