diff --git a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp index 3473fac01866d42a34e5bb981c590b01639636d3..4b6aafbd2fdc7103511855deb8c7b948ad904541 100644 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp @@ -57,8 +57,8 @@ size_t ConcurrentG1Refine::thread_num() { } void ConcurrentG1Refine::init() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { - G1CollectedHeap* g1h = G1CollectedHeap::heap(); _n_card_counts = (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift); _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts); @@ -83,6 +83,12 @@ void ConcurrentG1Refine::init() { _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size); _n_hot = 0; _hot_cache_idx = 0; + + // For refining the cards in the hot cache in parallel + int n_workers = (ParallelGCThreads > 0 ? + g1h->workers()->total_workers() : 1); + _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers); + _hot_cache_par_claimed_idx = 0; } } @@ -161,17 +167,23 @@ jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) { void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) { assert(!use_cache(), "cache should be disabled"); - int start_ind = _hot_cache_idx-1; - for (int i = 0; i < _n_hot; i++) { - int ind = start_ind - i; - if (ind < 0) ind = ind + _hot_cache_size; - jbyte* entry = _hot_cache[ind]; - if (entry != NULL) { - g1rs->concurrentRefineOneCard(entry, worker_i); + int start_idx; + + while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once + int end_idx = start_idx + _hot_cache_par_chunk_size; + + if (start_idx == + Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { + // The current worker has successfully claimed the chunk [start_idx..end_idx) + end_idx = MIN2(end_idx, _n_hot); + for (int i = start_idx; i < end_idx; i++) { + jbyte* entry = _hot_cache[i]; + if (entry != NULL) { + g1rs->concurrentRefineOneCard(entry, worker_i); + } + } } } - _n_hot = 0; - _hot_cache_idx = 0; } void ConcurrentG1Refine::clear_and_record_card_counts() { diff --git a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp index 830e19ee6338aa63d26bb03cdb96954195f1362b..27999644807421b05159b5c14448fa08294527ff 100644 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp @@ -36,15 +36,19 @@ class ConcurrentG1Refine: public CHeapObj { size_t _total_cards; size_t _total_travs; - unsigned char* _card_counts; - unsigned _n_card_counts; - const jbyte* _ct_bot; - unsigned* _cur_card_count_histo; - unsigned* _cum_card_count_histo; - jbyte** _hot_cache; - int _hot_cache_size; - int _n_hot; - int _hot_cache_idx; + unsigned char* _card_counts; + unsigned _n_card_counts; + const jbyte* _ct_bot; + unsigned* _cur_card_count_histo; + unsigned* _cum_card_count_histo; + + jbyte** _hot_cache; + int _hot_cache_size; + int _n_hot; + int _hot_cache_idx; + + int _hot_cache_par_chunk_size; + volatile int _hot_cache_par_claimed_idx; // Returns the count of this card after incrementing it. int add_card_count(jbyte* card_ptr); @@ -70,6 +74,11 @@ class ConcurrentG1Refine: public CHeapObj { // Process the cached entries. void clean_up_cache(int worker_i, G1RemSet* g1rs); + // Set up for parallel processing of the cards in the hot cache + void clear_hot_cache_claimed_index() { + _hot_cache_par_claimed_idx = 0; + } + // Discard entries in the hot cache. void clear_hot_cache() { _hot_cache_idx = 0; _n_hot = 0; diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp index 992df850a4c32f0e51cb513a86b7ed0e43003302..86872d6e0a68e4ecc44b161fec302b89fb3285c2 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -1637,6 +1637,9 @@ size_t G1CollectedHeap::capacity() const { void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent, int worker_i) { + // Clean cards in the hot card cache + concurrent_g1_refine()->clean_up_cache(worker_i, g1_rem_set()); + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); int n_completed_buffers = 0; while (dcqs.apply_closure_to_completed_buffer(worker_i, 0, true)) { @@ -1645,9 +1648,6 @@ void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent, g1_policy()->record_update_rs_processed_buffers(worker_i, (double) n_completed_buffers); dcqs.clear_n_completed_buffers(); - // Finish up the queue... - if (worker_i == 0) concurrent_g1_refine()->clean_up_cache(worker_i, - g1_rem_set()); assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!"); } @@ -4111,6 +4111,8 @@ void G1CollectedHeap::evacuate_collection_set() { g1_rem_set()->prepare_for_oops_into_collection_set_do(); concurrent_g1_refine()->set_use_cache(false); + concurrent_g1_refine()->clear_hot_cache_claimed_index(); + int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); set_par_threads(n_workers); G1ParTask g1_par_task(this, n_workers, _task_queues); @@ -4143,6 +4145,7 @@ void G1CollectedHeap::evacuate_collection_set() { } g1_rem_set()->cleanup_after_oops_into_collection_set_do(); + concurrent_g1_refine()->clear_hot_cache(); concurrent_g1_refine()->set_use_cache(true); finalize_for_evac_failure();