提交 8763012d 编写于 作者: J johnc

6865703: G1: Parallelize hot card cache cleanup

Summary: Have the GC worker threads clear the hot card cache in parallel by having each worker thread claim a chunk of the card cache and process the cards in that chunk. The size of the chunks that each thread will claim is determined at VM initialization from the size of the card cache and the number of worker threads.
Reviewed-by: jmasa, tonyp
上级 78e710c7
...@@ -57,8 +57,8 @@ size_t ConcurrentG1Refine::thread_num() { ...@@ -57,8 +57,8 @@ size_t ConcurrentG1Refine::thread_num() {
} }
void ConcurrentG1Refine::init() { void ConcurrentG1Refine::init() {
if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
G1CollectedHeap* g1h = G1CollectedHeap::heap(); G1CollectedHeap* g1h = G1CollectedHeap::heap();
if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
_n_card_counts = _n_card_counts =
(unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift); (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
_card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts); _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts);
...@@ -83,6 +83,12 @@ void ConcurrentG1Refine::init() { ...@@ -83,6 +83,12 @@ void ConcurrentG1Refine::init() {
_hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size); _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
_n_hot = 0; _n_hot = 0;
_hot_cache_idx = 0; _hot_cache_idx = 0;
// For refining the cards in the hot cache in parallel
int n_workers = (ParallelGCThreads > 0 ?
g1h->workers()->total_workers() : 1);
_hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);
_hot_cache_par_claimed_idx = 0;
} }
} }
...@@ -161,17 +167,23 @@ jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) { ...@@ -161,17 +167,23 @@ jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) {
void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) { void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
assert(!use_cache(), "cache should be disabled"); assert(!use_cache(), "cache should be disabled");
int start_ind = _hot_cache_idx-1; int start_idx;
for (int i = 0; i < _n_hot; i++) {
int ind = start_ind - i; while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
if (ind < 0) ind = ind + _hot_cache_size; int end_idx = start_idx + _hot_cache_par_chunk_size;
jbyte* entry = _hot_cache[ind];
if (start_idx ==
Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
// The current worker has successfully claimed the chunk [start_idx..end_idx)
end_idx = MIN2(end_idx, _n_hot);
for (int i = start_idx; i < end_idx; i++) {
jbyte* entry = _hot_cache[i];
if (entry != NULL) { if (entry != NULL) {
g1rs->concurrentRefineOneCard(entry, worker_i); g1rs->concurrentRefineOneCard(entry, worker_i);
} }
} }
_n_hot = 0; }
_hot_cache_idx = 0; }
} }
void ConcurrentG1Refine::clear_and_record_card_counts() { void ConcurrentG1Refine::clear_and_record_card_counts() {
......
...@@ -41,11 +41,15 @@ class ConcurrentG1Refine: public CHeapObj { ...@@ -41,11 +41,15 @@ class ConcurrentG1Refine: public CHeapObj {
const jbyte* _ct_bot; const jbyte* _ct_bot;
unsigned* _cur_card_count_histo; unsigned* _cur_card_count_histo;
unsigned* _cum_card_count_histo; unsigned* _cum_card_count_histo;
jbyte** _hot_cache; jbyte** _hot_cache;
int _hot_cache_size; int _hot_cache_size;
int _n_hot; int _n_hot;
int _hot_cache_idx; int _hot_cache_idx;
int _hot_cache_par_chunk_size;
volatile int _hot_cache_par_claimed_idx;
// Returns the count of this card after incrementing it. // Returns the count of this card after incrementing it.
int add_card_count(jbyte* card_ptr); int add_card_count(jbyte* card_ptr);
...@@ -70,6 +74,11 @@ class ConcurrentG1Refine: public CHeapObj { ...@@ -70,6 +74,11 @@ class ConcurrentG1Refine: public CHeapObj {
// Process the cached entries. // Process the cached entries.
void clean_up_cache(int worker_i, G1RemSet* g1rs); void clean_up_cache(int worker_i, G1RemSet* g1rs);
// Set up for parallel processing of the cards in the hot cache
void clear_hot_cache_claimed_index() {
_hot_cache_par_claimed_idx = 0;
}
// Discard entries in the hot cache. // Discard entries in the hot cache.
void clear_hot_cache() { void clear_hot_cache() {
_hot_cache_idx = 0; _n_hot = 0; _hot_cache_idx = 0; _n_hot = 0;
......
...@@ -1637,6 +1637,9 @@ size_t G1CollectedHeap::capacity() const { ...@@ -1637,6 +1637,9 @@ size_t G1CollectedHeap::capacity() const {
void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent, void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent,
int worker_i) { int worker_i) {
// Clean cards in the hot card cache
concurrent_g1_refine()->clean_up_cache(worker_i, g1_rem_set());
DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
int n_completed_buffers = 0; int n_completed_buffers = 0;
while (dcqs.apply_closure_to_completed_buffer(worker_i, 0, true)) { while (dcqs.apply_closure_to_completed_buffer(worker_i, 0, true)) {
...@@ -1645,9 +1648,6 @@ void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent, ...@@ -1645,9 +1648,6 @@ void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent,
g1_policy()->record_update_rs_processed_buffers(worker_i, g1_policy()->record_update_rs_processed_buffers(worker_i,
(double) n_completed_buffers); (double) n_completed_buffers);
dcqs.clear_n_completed_buffers(); dcqs.clear_n_completed_buffers();
// Finish up the queue...
if (worker_i == 0) concurrent_g1_refine()->clean_up_cache(worker_i,
g1_rem_set());
assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!"); assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!");
} }
...@@ -4111,6 +4111,8 @@ void G1CollectedHeap::evacuate_collection_set() { ...@@ -4111,6 +4111,8 @@ void G1CollectedHeap::evacuate_collection_set() {
g1_rem_set()->prepare_for_oops_into_collection_set_do(); g1_rem_set()->prepare_for_oops_into_collection_set_do();
concurrent_g1_refine()->set_use_cache(false); concurrent_g1_refine()->set_use_cache(false);
concurrent_g1_refine()->clear_hot_cache_claimed_index();
int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
set_par_threads(n_workers); set_par_threads(n_workers);
G1ParTask g1_par_task(this, n_workers, _task_queues); G1ParTask g1_par_task(this, n_workers, _task_queues);
...@@ -4143,6 +4145,7 @@ void G1CollectedHeap::evacuate_collection_set() { ...@@ -4143,6 +4145,7 @@ void G1CollectedHeap::evacuate_collection_set() {
} }
g1_rem_set()->cleanup_after_oops_into_collection_set_do(); g1_rem_set()->cleanup_after_oops_into_collection_set_do();
concurrent_g1_refine()->clear_hot_cache();
concurrent_g1_refine()->set_use_cache(true); concurrent_g1_refine()->set_use_cache(true);
finalize_for_evac_failure(); finalize_for_evac_failure();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册