diff --git a/make/excludeSrc.make b/make/excludeSrc.make index 5ec9566caeacd923be7e68fdc462af45ad80c71c..93f68ee41c035e7f792d79a028aefc7e6a604cd7 100644 --- a/make/excludeSrc.make +++ b/make/excludeSrc.make @@ -1,5 +1,5 @@ # -# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -81,22 +81,25 @@ ifeq ($(INCLUDE_ALL_GCS), false) cmsAdaptiveSizePolicy.cpp cmsCollectorPolicy.cpp \ cmsGCAdaptivePolicyCounters.cpp cmsLockVerifier.cpp compactibleFreeListSpace.cpp \ concurrentMarkSweepGeneration.cpp concurrentMarkSweepThread.cpp \ - freeChunk.cpp adaptiveFreeList.cpp promotionInfo.cpp vmCMSOperations.cpp collectionSetChooser.cpp \ - concurrentG1Refine.cpp concurrentG1RefineThread.cpp concurrentMark.cpp concurrentMarkThread.cpp \ - dirtyCardQueue.cpp g1AllocRegion.cpp g1BlockOffsetTable.cpp g1CollectedHeap.cpp g1GCPhaseTimes.cpp \ - g1CollectorPolicy.cpp g1ErgoVerbose.cpp g1_globals.cpp g1HRPrinter.cpp g1MarkSweep.cpp \ - g1MMUTracker.cpp g1MonitoringSupport.cpp g1RemSet.cpp g1SATBCardTableModRefBS.cpp heapRegion.cpp \ - heapRegionRemSet.cpp heapRegionSeq.cpp heapRegionSet.cpp heapRegionSets.cpp ptrQueue.cpp \ - satbQueue.cpp sparsePRT.cpp survRateGroup.cpp vm_operations_g1.cpp adjoiningGenerations.cpp \ - adjoiningVirtualSpaces.cpp asPSOldGen.cpp asPSYoungGen.cpp cardTableExtension.cpp \ - gcTaskManager.cpp gcTaskThread.cpp objectStartArray.cpp parallelScavengeHeap.cpp parMarkBitMap.cpp \ - pcTasks.cpp psAdaptiveSizePolicy.cpp psCompactionManager.cpp psGCAdaptivePolicyCounters.cpp \ - psGenerationCounters.cpp psMarkSweep.cpp psMarkSweepDecorator.cpp psOldGen.cpp psParallelCompact.cpp \ - psPromotionLAB.cpp psPromotionManager.cpp psScavenge.cpp psTasks.cpp psVirtualspace.cpp \ - psYoungGen.cpp vmPSOperations.cpp asParNewGeneration.cpp parCardTableModRefBS.cpp \ - parGCAllocBuffer.cpp parNewGeneration.cpp mutableSpace.cpp gSpaceCounters.cpp allocationStats.cpp \ - spaceCounters.cpp gcAdaptivePolicyCounters.cpp mutableNUMASpace.cpp immutableSpace.cpp \ - immutableSpace.cpp g1MemoryPool.cpp psMemoryPool.cpp yieldingWorkGroup.cpp g1Log.cpp + freeChunk.cpp adaptiveFreeList.cpp promotionInfo.cpp vmCMSOperations.cpp \ + collectionSetChooser.cpp concurrentG1Refine.cpp concurrentG1RefineThread.cpp \ + concurrentMark.cpp concurrentMarkThread.cpp dirtyCardQueue.cpp g1AllocRegion.cpp \ + g1BlockOffsetTable.cpp g1CardCounts.cpp g1CollectedHeap.cpp g1CollectorPolicy.cpp \ + g1ErgoVerbose.cpp g1GCPhaseTimes.cpp g1HRPrinter.cpp g1HotCardCache.cpp g1Log.cpp \ + g1MMUTracker.cpp g1MarkSweep.cpp g1MemoryPool.cpp g1MonitoringSupport.cpp \ + g1RemSet.cpp g1SATBCardTableModRefBS.cpp g1_globals.cpp heapRegion.cpp \ + heapRegionRemSet.cpp heapRegionSeq.cpp heapRegionSet.cpp heapRegionSets.cpp \ + ptrQueue.cpp satbQueue.cpp sparsePRT.cpp survRateGroup.cpp vm_operations_g1.cpp \ + adjoiningGenerations.cpp adjoiningVirtualSpaces.cpp asPSOldGen.cpp asPSYoungGen.cpp \ + cardTableExtension.cpp gcTaskManager.cpp gcTaskThread.cpp objectStartArray.cpp \ + parallelScavengeHeap.cpp parMarkBitMap.cpp pcTasks.cpp psAdaptiveSizePolicy.cpp \ + psCompactionManager.cpp psGCAdaptivePolicyCounters.cpp psGenerationCounters.cpp \ + psMarkSweep.cpp psMarkSweepDecorator.cpp psMemoryPool.cpp psOldGen.cpp \ + psParallelCompact.cpp psPromotionLAB.cpp psPromotionManager.cpp psScavenge.cpp \ + psTasks.cpp psVirtualspace.cpp psYoungGen.cpp vmPSOperations.cpp asParNewGeneration.cpp \ + parCardTableModRefBS.cpp parGCAllocBuffer.cpp parNewGeneration.cpp mutableSpace.cpp \ + gSpaceCounters.cpp allocationStats.cpp spaceCounters.cpp gcAdaptivePolicyCounters.cpp \ + mutableNUMASpace.cpp immutableSpace.cpp yieldingWorkGroup.cpp endif ifeq ($(INCLUDE_NMT), false) diff --git a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp index 3eab85e13c8a2eb44fb88a6d82b1bff58f88c057..5f049209e296401ab9ebc0b5008959168f13009c 100644 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,40 +26,12 @@ #include "gc_implementation/g1/concurrentG1Refine.hpp" #include "gc_implementation/g1/concurrentG1RefineThread.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" -#include "gc_implementation/g1/g1CollectorPolicy.hpp" -#include "gc_implementation/g1/g1GCPhaseTimes.hpp" -#include "gc_implementation/g1/g1RemSet.hpp" -#include "gc_implementation/g1/heapRegionSeq.inline.hpp" -#include "memory/space.inline.hpp" -#include "runtime/atomic.hpp" -#include "runtime/java.hpp" -#include "utilities/copy.hpp" +#include "gc_implementation/g1/g1HotCardCache.hpp" -// Possible sizes for the card counts cache: odd primes that roughly double in size. -// (See jvmtiTagMap.cpp). - -#define MAX_SIZE ((size_t) -1) - -size_t ConcurrentG1Refine::_cc_cache_sizes[] = { - 16381, 32771, 76831, 150001, 307261, - 614563, 1228891, 2457733, 4915219, 9830479, - 19660831, 39321619, 78643219, 157286461, MAX_SIZE - }; - -ConcurrentG1Refine::ConcurrentG1Refine() : - _card_counts(NULL), _card_epochs(NULL), - _n_card_counts(0), _max_cards(0), _max_n_card_counts(0), - _cache_size_index(0), _expand_card_counts(false), - _hot_cache(NULL), - _def_use_cache(false), _use_cache(false), - // We initialize the epochs of the array to 0. By initializing - // _n_periods to 1 and not 0 we automatically invalidate all the - // entries on the array. Otherwise we might accidentally think that - // we claimed a card that was in fact never set (see CR7033292). - _n_periods(1), - _threads(NULL), _n_threads(0) +ConcurrentG1Refine::ConcurrentG1Refine(G1CollectedHeap* g1h) : + _threads(NULL), _n_threads(0), + _hot_card_cache(g1h) { - // Ergomonically select initial concurrent refinement parameters if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) { FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2(ParallelGCThreads, 1)); @@ -75,13 +47,17 @@ ConcurrentG1Refine::ConcurrentG1Refine() : FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2); } set_red_zone(MAX2(G1ConcRefinementRedZone, yellow_zone())); + _n_worker_threads = thread_num(); // We need one extra thread to do the young gen rset size sampling. _n_threads = _n_worker_threads + 1; + reset_threshold_step(); _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads, mtGC); + int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); + ConcurrentG1RefineThread *next = NULL; for (int i = _n_threads - 1; i >= 0; i--) { ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); @@ -100,74 +76,8 @@ void ConcurrentG1Refine::reset_threshold_step() { } } -int ConcurrentG1Refine::thread_num() { - return MAX2((G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads, 1); -} - void ConcurrentG1Refine::init() { - if (G1ConcRSLogCacheSize > 0) { - _g1h = G1CollectedHeap::heap(); - - _max_cards = _g1h->max_capacity() >> CardTableModRefBS::card_shift; - _max_n_card_counts = _max_cards * G1MaxHotCardCountSizePercent / 100; - - size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1; - guarantee(_max_cards < max_card_num, "card_num representation"); - - // We need _n_card_counts to be less than _max_n_card_counts here - // so that the expansion call (below) actually allocates the - // _counts and _epochs arrays. - assert(_n_card_counts == 0, "pre-condition"); - assert(_max_n_card_counts > 0, "pre-condition"); - - // Find the index into cache size array that is of a size that's - // large enough to hold desired_sz. - size_t desired_sz = _max_cards / InitialCacheFraction; - int desired_sz_index = 0; - while (_cc_cache_sizes[desired_sz_index] < desired_sz) { - desired_sz_index += 1; - assert(desired_sz_index < MAX_CC_CACHE_INDEX, "invariant"); - } - assert(desired_sz_index < MAX_CC_CACHE_INDEX, "invariant"); - - // If the desired_sz value is between two sizes then - // _cc_cache_sizes[desired_sz_index-1] < desired_sz <= _cc_cache_sizes[desired_sz_index] - // we will start with the lower size in the optimistic expectation that - // we will not need to expand up. Note desired_sz_index could also be 0. - if (desired_sz_index > 0 && - _cc_cache_sizes[desired_sz_index] > desired_sz) { - desired_sz_index -= 1; - } - - if (!expand_card_count_cache(desired_sz_index)) { - // Allocation was unsuccessful - exit - vm_exit_during_initialization("Could not reserve enough space for card count cache"); - } - assert(_n_card_counts > 0, "post-condition"); - assert(_cache_size_index == desired_sz_index, "post-condition"); - - Copy::fill_to_bytes(&_card_counts[0], - _n_card_counts * sizeof(CardCountCacheEntry)); - Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry)); - - ModRefBarrierSet* bs = _g1h->mr_bs(); - guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition"); - _ct_bs = (CardTableModRefBS*)bs; - _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start()); - - _def_use_cache = true; - _use_cache = true; - _hot_cache_size = (1 << G1ConcRSLogCacheSize); - _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC); - _n_hot = 0; - _hot_cache_idx = 0; - - // For refining the cards in the hot cache in parallel - int n_workers = (ParallelGCThreads > 0 ? - _g1h->workers()->total_workers() : 1); - _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers); - _hot_cache_par_claimed_idx = 0; - } + _hot_card_cache.initialize(); } void ConcurrentG1Refine::stop() { @@ -188,17 +98,6 @@ void ConcurrentG1Refine::reinitialize_threads() { } ConcurrentG1Refine::~ConcurrentG1Refine() { - if (G1ConcRSLogCacheSize > 0) { - // Please see the comment in allocate_card_count_cache - // for why we call os::malloc() and os::free() directly. - assert(_card_counts != NULL, "Logic"); - os::free(_card_counts, mtGC); - assert(_card_epochs != NULL, "Logic"); - os::free(_card_epochs, mtGC); - - assert(_hot_cache != NULL, "Logic"); - FREE_C_HEAP_ARRAY(jbyte*, _hot_cache, mtGC); - } if (_threads != NULL) { for (int i = 0; i < _n_threads; i++) { delete _threads[i]; @@ -215,317 +114,10 @@ void ConcurrentG1Refine::threads_do(ThreadClosure *tc) { } } -bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) { - HeapWord* start = _ct_bs->addr_for(card_ptr); - HeapRegion* r = _g1h->heap_region_containing(start); - if (r != NULL && r->is_young()) { - return true; - } - // This card is not associated with a heap region - // so can't be young. - return false; -} - -jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) { - unsigned new_card_num = ptr_2_card_num(card_ptr); - unsigned bucket = hash(new_card_num); - assert(0 <= bucket && bucket < _n_card_counts, "Bounds"); - - CardCountCacheEntry* count_ptr = &_card_counts[bucket]; - CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket]; - - // We have to construct a new entry if we haven't updated the counts - // during the current period, or if the count was updated for a - // different card number. - unsigned int new_epoch = (unsigned int) _n_periods; - julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch); - - while (true) { - // Fetch the previous epoch value - julong prev_epoch_entry = epoch_ptr->_value; - julong cas_res; - - if (extract_epoch(prev_epoch_entry) != new_epoch) { - // This entry has not yet been updated during this period. - // Note: we update the epoch value atomically to ensure - // that there is only one winner that updates the cached - // card_ptr value even though all the refine threads share - // the same epoch value. - - cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry, - (volatile jlong*)&epoch_ptr->_value, - (jlong) prev_epoch_entry); - - if (cas_res == prev_epoch_entry) { - // We have successfully won the race to update the - // epoch and card_num value. Make it look like the - // count and eviction count were previously cleared. - count_ptr->_count = 1; - count_ptr->_evict_count = 0; - *count = 0; - // We can defer the processing of card_ptr - *defer = true; - return card_ptr; - } - // We did not win the race to update the epoch field, so some other - // thread must have done it. The value that gets returned by CAS - // should be the new epoch value. - assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch"); - // We could 'continue' here or just re-read the previous epoch value - prev_epoch_entry = epoch_ptr->_value; - } - - // The epoch entry for card_ptr has been updated during this period. - unsigned old_card_num = extract_card_num(prev_epoch_entry); - - // The card count that will be returned to caller - *count = count_ptr->_count; - - // Are we updating the count for the same card? - if (new_card_num == old_card_num) { - // Same card - just update the count. We could have more than one - // thread racing to update count for the current card. It should be - // OK not to use a CAS as the only penalty should be some missed - // increments of the count which delays identifying the card as "hot". - - if (*count < max_jubyte) count_ptr->_count++; - // We can defer the processing of card_ptr - *defer = true; - return card_ptr; - } - - // Different card - evict old card info - if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++; - if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) { - // Trigger a resize the next time we clear - _expand_card_counts = true; - } - - cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry, - (volatile jlong*)&epoch_ptr->_value, - (jlong) prev_epoch_entry); - - if (cas_res == prev_epoch_entry) { - // We successfully updated the card num value in the epoch entry - count_ptr->_count = 0; // initialize counter for new card num - jbyte* old_card_ptr = card_num_2_ptr(old_card_num); - - // Even though the region containg the card at old_card_num was not - // in the young list when old_card_num was recorded in the epoch - // cache it could have been added to the free list and subsequently - // added to the young list in the intervening time. See CR 6817995. - // We do not deal with this case here - it will be handled in - // HeapRegion::oops_on_card_seq_iterate_careful after it has been - // determined that the region containing the card has been allocated - // to, and it's safe to check the young type of the region. - - // We do not want to defer processing of card_ptr in this case - // (we need to refine old_card_ptr and card_ptr) - *defer = false; - return old_card_ptr; - } - // Someone else beat us - try again. - } -} - -jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) { - int count; - jbyte* cached_ptr = add_card_count(card_ptr, &count, defer); - assert(cached_ptr != NULL, "bad cached card ptr"); - - // We've just inserted a card pointer into the card count cache - // and got back the card that we just inserted or (evicted) the - // previous contents of that count slot. - - // The card we got back could be in a young region. When the - // returned card (if evicted) was originally inserted, we had - // determined that its containing region was not young. However - // it is possible for the region to be freed during a cleanup - // pause, then reallocated and tagged as young which will result - // in the returned card residing in a young region. - // - // We do not deal with this case here - the change from non-young - // to young could be observed at any time - it will be handled in - // HeapRegion::oops_on_card_seq_iterate_careful after it has been - // determined that the region containing the card has been allocated - // to. - - // The card pointer we obtained from card count cache is not hot - // so do not store it in the cache; return it for immediate - // refining. - if (count < G1ConcRSHotCardLimit) { - return cached_ptr; - } - - // Otherwise, the pointer we got from the _card_counts cache is hot. - jbyte* res = NULL; - MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); - if (_n_hot == _hot_cache_size) { - res = _hot_cache[_hot_cache_idx]; - _n_hot--; - } - // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. - _hot_cache[_hot_cache_idx] = cached_ptr; - _hot_cache_idx++; - if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0; - _n_hot++; - - // The card obtained from the hot card cache could be in a young - // region. See above on how this can happen. - - return res; -} - -void ConcurrentG1Refine::clean_up_cache(int worker_i, - G1RemSet* g1rs, - DirtyCardQueue* into_cset_dcq) { - assert(!use_cache(), "cache should be disabled"); - int start_idx; - - while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once - int end_idx = start_idx + _hot_cache_par_chunk_size; - - if (start_idx == - Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { - // The current worker has successfully claimed the chunk [start_idx..end_idx) - end_idx = MIN2(end_idx, _n_hot); - for (int i = start_idx; i < end_idx; i++) { - jbyte* entry = _hot_cache[i]; - if (entry != NULL) { - if (g1rs->concurrentRefineOneCard(entry, worker_i, true)) { - // 'entry' contains references that point into the current - // collection set. We need to record 'entry' in the DCQS - // that's used for that purpose. - // - // The only time we care about recording cards that contain - // references that point into the collection set is during - // RSet updating while within an evacuation pause. - // In this case worker_i should be the id of a GC worker thread - assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause"); - assert(worker_i < (int) (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), "incorrect worker id"); - into_cset_dcq->enqueue(entry); - } - } - } - } - } -} - -// The arrays used to hold the card counts and the epochs must have -// a 1:1 correspondence. Hence they are allocated and freed together -// Returns true if the allocations of both the counts and epochs -// were successful; false otherwise. -bool ConcurrentG1Refine::allocate_card_count_cache(size_t n, - CardCountCacheEntry** counts, - CardEpochCacheEntry** epochs) { - // We call the allocation/free routines directly for the counts - // and epochs arrays. The NEW_C_HEAP_ARRAY/FREE_C_HEAP_ARRAY - // macros call AllocateHeap and FreeHeap respectively. - // AllocateHeap will call vm_exit_out_of_memory in the event - // of an allocation failure and abort the JVM. With the - // _counts/epochs arrays we only need to abort the JVM if the - // initial allocation of these arrays fails. - // - // Additionally AllocateHeap/FreeHeap do some tracing of - // allocate/free calls so calling one without calling the - // other can cause inconsistencies in the tracing. So we - // call neither. - - assert(*counts == NULL, "out param"); - assert(*epochs == NULL, "out param"); - - size_t counts_size = n * sizeof(CardCountCacheEntry); - size_t epochs_size = n * sizeof(CardEpochCacheEntry); - - *counts = (CardCountCacheEntry*) os::malloc(counts_size, mtGC); - if (*counts == NULL) { - // allocation was unsuccessful - return false; - } - - *epochs = (CardEpochCacheEntry*) os::malloc(epochs_size, mtGC); - if (*epochs == NULL) { - // allocation was unsuccessful - free counts array - assert(*counts != NULL, "must be"); - os::free(*counts, mtGC); - *counts = NULL; - return false; - } - - // We successfully allocated both counts and epochs - return true; -} - -// Returns true if the card counts/epochs cache was -// successfully expanded; false otherwise. -bool ConcurrentG1Refine::expand_card_count_cache(int cache_size_idx) { - // Can we expand the card count and epoch tables? - if (_n_card_counts < _max_n_card_counts) { - assert(cache_size_idx >= 0 && cache_size_idx < MAX_CC_CACHE_INDEX, "oob"); - - size_t cache_size = _cc_cache_sizes[cache_size_idx]; - // Make sure we don't go bigger than we will ever need - cache_size = MIN2(cache_size, _max_n_card_counts); - - // Should we expand the card count and card epoch tables? - if (cache_size > _n_card_counts) { - // We have been asked to allocate new, larger, arrays for - // the card counts and the epochs. Attempt the allocation - // of both before we free the existing arrays in case - // the allocation is unsuccessful... - CardCountCacheEntry* counts = NULL; - CardEpochCacheEntry* epochs = NULL; - - if (allocate_card_count_cache(cache_size, &counts, &epochs)) { - // Allocation was successful. - // We can just free the old arrays; we're - // not interested in preserving the contents - if (_card_counts != NULL) os::free(_card_counts, mtGC); - if (_card_epochs != NULL) os::free(_card_epochs, mtGC); - - // Cache the size of the arrays and the index that got us there. - _n_card_counts = cache_size; - _cache_size_index = cache_size_idx; - - _card_counts = counts; - _card_epochs = epochs; - - // We successfully allocated/expanded the caches. - return true; - } - } - } - - // We did not successfully expand the caches. - return false; -} - -void ConcurrentG1Refine::clear_and_record_card_counts() { - if (G1ConcRSLogCacheSize == 0) { - return; - } - - double start = os::elapsedTime(); - - if (_expand_card_counts) { - int new_idx = _cache_size_index + 1; - - if (expand_card_count_cache(new_idx)) { - // Allocation was successful and _n_card_counts has - // been updated to the new size. We only need to clear - // the epochs so we don't read a bogus epoch value - // when inserting a card into the hot card cache. - Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry)); - } - _expand_card_counts = false; - } - - int this_epoch = (int) _n_periods; - assert((this_epoch+1) <= max_jint, "to many periods"); - // Update epoch - _n_periods++; - double cc_clear_time_ms = (os::elapsedTime() - start) * 1000; - _g1h->g1_policy()->phase_times()->record_cc_clear_time_ms(cc_clear_time_ms); +int ConcurrentG1Refine::thread_num() { + int n_threads = (G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads + : ParallelGCThreads; + return MAX2(n_threads, 1); } void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const { diff --git a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp index 75a31179af11e6b508593a5453204aabf666ae7f..46e6622eee766d4d851a54fc06e9ba8fa9cfbd50 100644 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,13 +25,15 @@ #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTG1REFINE_HPP #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTG1REFINE_HPP +#include "gc_implementation/g1/g1HotCardCache.hpp" #include "memory/allocation.hpp" -#include "memory/cardTableModRefBS.hpp" #include "runtime/thread.hpp" #include "utilities/globalDefinitions.hpp" // Forward decl class ConcurrentG1RefineThread; +class G1CollectedHeap; +class G1HotCardCache; class G1RemSet; class ConcurrentG1Refine: public CHeapObj { @@ -61,141 +63,14 @@ class ConcurrentG1Refine: public CHeapObj { int _thread_threshold_step; + // We delay the refinement of 'hot' cards using the hot card cache. + G1HotCardCache _hot_card_cache; + // Reset the threshold step value based of the current zone boundaries. void reset_threshold_step(); - // The cache for card refinement. - bool _use_cache; - bool _def_use_cache; - - size_t _n_periods; // Used as clearing epoch - - // An evicting cache of the number of times each card - // is accessed. Reduces, but does not eliminate, the amount - // of duplicated processing of dirty cards. - - enum SomePrivateConstants { - epoch_bits = 32, - card_num_shift = epoch_bits, - epoch_mask = AllBits, - card_num_mask = AllBits, - - // The initial cache size is approximately this fraction - // of a maximal cache (i.e. the size needed for all cards - // in the heap) - InitialCacheFraction = 512 - }; - - const static julong card_num_mask_in_place = - (julong) card_num_mask << card_num_shift; - - typedef struct { - julong _value; // | card_num | epoch | - } CardEpochCacheEntry; - - julong make_epoch_entry(unsigned int card_num, unsigned int epoch) { - assert(0 <= card_num && card_num < _max_cards, "Bounds"); - assert(0 <= epoch && epoch <= _n_periods, "must be"); - - return ((julong) card_num << card_num_shift) | epoch; - } - - unsigned int extract_epoch(julong v) { - return (v & epoch_mask); - } - - unsigned int extract_card_num(julong v) { - return (v & card_num_mask_in_place) >> card_num_shift; - } - - typedef struct { - unsigned char _count; - unsigned char _evict_count; - } CardCountCacheEntry; - - CardCountCacheEntry* _card_counts; - CardEpochCacheEntry* _card_epochs; - - // The current number of buckets in the card count cache - size_t _n_card_counts; - - // The number of cards for the entire reserved heap - size_t _max_cards; - - // The max number of buckets for the card counts and epochs caches. - // This is the maximum that the counts and epochs will grow to. - // It is specified as a fraction or percentage of _max_cards using - // G1MaxHotCardCountSizePercent. - size_t _max_n_card_counts; - - // Possible sizes of the cache: odd primes that roughly double in size. - // (See jvmtiTagMap.cpp). - enum { - MAX_CC_CACHE_INDEX = 15 // maximum index into the cache size array. - }; - - static size_t _cc_cache_sizes[MAX_CC_CACHE_INDEX]; - - // The index in _cc_cache_sizes corresponding to the size of - // _card_counts. - int _cache_size_index; - - bool _expand_card_counts; - - const jbyte* _ct_bot; - - jbyte** _hot_cache; - int _hot_cache_size; - int _n_hot; - int _hot_cache_idx; - - int _hot_cache_par_chunk_size; - volatile int _hot_cache_par_claimed_idx; - - // Needed to workaround 6817995 - CardTableModRefBS* _ct_bs; - G1CollectedHeap* _g1h; - - // Helper routine for expand_card_count_cache(). - // The arrays used to hold the card counts and the epochs must have - // a 1:1 correspondence. Hence they are allocated and freed together. - // Returns true if the allocations of both the counts and epochs - // were successful; false otherwise. - bool allocate_card_count_cache(size_t n, - CardCountCacheEntry** counts, - CardEpochCacheEntry** epochs); - - // Expands the arrays that hold the card counts and epochs - // to the cache size at index. Returns true if the expansion/ - // allocation was successful; false otherwise. - bool expand_card_count_cache(int index); - - // hash a given key (index of card_ptr) with the specified size - static unsigned int hash(size_t key, size_t size) { - return (unsigned int) (key % size); - } - - // hash a given key (index of card_ptr) - unsigned int hash(size_t key) { - return hash(key, _n_card_counts); - } - - unsigned int ptr_2_card_num(jbyte* card_ptr) { - return (unsigned int) (card_ptr - _ct_bot); - } - - jbyte* card_num_2_ptr(unsigned int card_num) { - return (jbyte*) (_ct_bot + card_num); - } - - // Returns the count of this card after incrementing it. - jbyte* add_card_count(jbyte* card_ptr, int* count, bool* defer); - - // Returns true if this card is in a young region - bool is_young_card(jbyte* card_ptr); - public: - ConcurrentG1Refine(); + ConcurrentG1Refine(G1CollectedHeap* g1h); ~ConcurrentG1Refine(); void init(); // Accomplish some initialization that has to wait. @@ -206,34 +81,6 @@ class ConcurrentG1Refine: public CHeapObj { // Iterate over the conc refine threads void threads_do(ThreadClosure *tc); - // If this is the first entry for the slot, writes into the cache and - // returns NULL. If it causes an eviction, returns the evicted pointer. - // Otherwise, its a cache hit, and returns NULL. - jbyte* cache_insert(jbyte* card_ptr, bool* defer); - - // Process the cached entries. - void clean_up_cache(int worker_i, G1RemSet* g1rs, DirtyCardQueue* into_cset_dcq); - - // Set up for parallel processing of the cards in the hot cache - void clear_hot_cache_claimed_index() { - _hot_cache_par_claimed_idx = 0; - } - - // Discard entries in the hot cache. - void clear_hot_cache() { - _hot_cache_idx = 0; _n_hot = 0; - } - - bool hot_cache_is_empty() { return _n_hot == 0; } - - bool use_cache() { return _use_cache; } - void set_use_cache(bool b) { - if (b) _use_cache = _def_use_cache; - else _use_cache = false; - } - - void clear_and_record_card_counts(); - static int thread_num(); void print_worker_threads_on(outputStream* st) const; @@ -250,6 +97,8 @@ class ConcurrentG1Refine: public CHeapObj { int worker_thread_num() const { return _n_worker_threads; } int thread_threshold_step() const { return _thread_threshold_step; } + + G1HotCardCache* hot_card_cache() { return &_hot_card_cache; } }; #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTG1REFINE_HPP diff --git a/src/share/vm/gc_implementation/g1/g1CardCounts.cpp b/src/share/vm/gc_implementation/g1/g1CardCounts.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a055d4f285b092a1c7da112e5afa86240e71345d --- /dev/null +++ b/src/share/vm/gc_implementation/g1/g1CardCounts.cpp @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/g1/g1CardCounts.hpp" +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1CollectorPolicy.hpp" +#include "gc_implementation/g1/g1GCPhaseTimes.hpp" +#include "memory/cardTableModRefBS.hpp" +#include "services/memTracker.hpp" +#include "utilities/copy.hpp" + +void G1CardCounts::clear_range(size_t from_card_num, size_t to_card_num) { + if (has_count_table()) { + check_card_num(from_card_num, + err_msg("from card num out of range: "SIZE_FORMAT, from_card_num)); + assert(from_card_num < to_card_num, + err_msg("Wrong order? from: " SIZE_FORMAT ", to: "SIZE_FORMAT, + from_card_num, to_card_num)); + assert(to_card_num <= _committed_max_card_num, + err_msg("to card num out of range: " + "to: "SIZE_FORMAT ", " + "max: "SIZE_FORMAT, + to_card_num, _committed_max_card_num)); + + to_card_num = MIN2(_committed_max_card_num, to_card_num); + + Copy::fill_to_bytes(&_card_counts[from_card_num], (to_card_num - from_card_num)); + } +} + +G1CardCounts::G1CardCounts(G1CollectedHeap *g1h): + _g1h(g1h), _card_counts(NULL), + _reserved_max_card_num(0), _committed_max_card_num(0), + _committed_size(0) {} + +void G1CardCounts::initialize() { + assert(_g1h->max_capacity() > 0, "initialization order"); + assert(_g1h->capacity() == 0, "initialization order"); + + if (G1ConcRSHotCardLimit > 0) { + // The max value we can store in the counts table is + // max_jubyte. Guarantee the value of the hot + // threshold limit is no more than this. + guarantee(G1ConcRSHotCardLimit <= max_jubyte, "sanity"); + + ModRefBarrierSet* bs = _g1h->mr_bs(); + guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition"); + _ct_bs = (CardTableModRefBS*)bs; + _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start()); + + // Allocate/Reserve the counts table + size_t reserved_bytes = _g1h->max_capacity(); + _reserved_max_card_num = reserved_bytes >> CardTableModRefBS::card_shift; + + size_t reserved_size = _reserved_max_card_num * sizeof(jbyte); + ReservedSpace rs(ReservedSpace::allocation_align_size_up(reserved_size)); + if (!rs.is_reserved()) { + warning("Could not reserve enough space for the card counts table"); + guarantee(!has_reserved_count_table(), "should be NULL"); + return; + } + + MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); + + _card_counts_storage.initialize(rs, 0); + _card_counts = (jubyte*) _card_counts_storage.low(); + } +} + +void G1CardCounts::resize(size_t heap_capacity) { + // Expand the card counts table to handle a heap with the given capacity. + + if (!has_reserved_count_table()) { + // Don't expand if we failed to reserve the card counts table. + return; + } + + assert(_committed_size == + ReservedSpace::allocation_align_size_up(_committed_size), + err_msg("Unaligned? committed_size: " SIZE_FORMAT, _committed_size)); + + // Verify that the committed space for the card counts + // matches our committed max card num. + size_t prev_committed_size = _committed_size; + size_t prev_committed_card_num = prev_committed_size / sizeof(jbyte); + assert(prev_committed_card_num == _committed_max_card_num, + err_msg("Card mismatch: " + "prev: " SIZE_FORMAT ", " + "committed: "SIZE_FORMAT, + prev_committed_card_num, _committed_max_card_num)); + + size_t new_size = (heap_capacity >> CardTableModRefBS::card_shift) * sizeof(jbyte); + size_t new_committed_size = ReservedSpace::allocation_align_size_up(new_size); + size_t new_committed_card_num = + MIN2(_reserved_max_card_num, new_committed_size / sizeof(jbyte)); + + if (_committed_max_card_num < new_committed_card_num) { + // we need to expand the backing store for the card counts + size_t expand_size = new_committed_size - prev_committed_size; + + if (!_card_counts_storage.expand_by(expand_size)) { + warning("Card counts table backing store commit failure"); + return; + } + assert(_card_counts_storage.committed_size() == new_committed_size, + "expansion commit failure"); + + _committed_size = new_committed_size; + _committed_max_card_num = new_committed_card_num; + + clear_range(prev_committed_card_num, _committed_max_card_num); + } +} + +uint G1CardCounts::add_card_count(jbyte* card_ptr) { + // Returns the number of times the card has been refined. + // If we failed to reserve/commit the counts table, return 0. + // If card_ptr is beyond the committed end of the counts table, + // return 0. + // Otherwise return the actual count. + // Unless G1ConcRSHotCardLimit has been set appropriately, + // returning 0 will result in the card being considered + // cold and will be refined immediately. + uint count = 0; + if (has_count_table()) { + size_t card_num = ptr_2_card_num(card_ptr); + if (card_num < _committed_max_card_num) { + count = (uint) _card_counts[card_num]; + if (count < G1ConcRSHotCardLimit) { + _card_counts[card_num] += 1; + } + assert(_card_counts[card_num] <= G1ConcRSHotCardLimit, + err_msg("Refinement count overflow? " + "new count: "UINT32_FORMAT, + (uint) _card_counts[card_num])); + } + } + return count; +} + +bool G1CardCounts::is_hot(uint count) { + return (count >= G1ConcRSHotCardLimit); +} + +void G1CardCounts::clear_region(HeapRegion* hr) { + assert(!hr->isHumongous(), "Should have been cleared"); + if (has_count_table()) { + HeapWord* bottom = hr->bottom(); + + // We use the last address in hr as hr could be the + // last region in the heap. In which case trying to find + // the card for hr->end() will be an OOB accesss to the + // card table. + HeapWord* last = hr->end() - 1; + assert(_g1h->g1_committed().contains(last), + err_msg("last not in committed: " + "last: " PTR_FORMAT ", " + "committed: [" PTR_FORMAT ", " PTR_FORMAT ")", + last, + _g1h->g1_committed().start(), + _g1h->g1_committed().end())); + + const jbyte* from_card_ptr = _ct_bs->byte_for_const(bottom); + const jbyte* last_card_ptr = _ct_bs->byte_for_const(last); + +#ifdef ASSERT + HeapWord* start_addr = _ct_bs->addr_for(from_card_ptr); + assert(start_addr == hr->bottom(), "alignment"); + HeapWord* last_addr = _ct_bs->addr_for(last_card_ptr); + assert((last_addr + CardTableModRefBS::card_size_in_words) == hr->end(), "alignment"); +#endif // ASSERT + + // Clear the counts for the (exclusive) card range. + size_t from_card_num = ptr_2_card_num(from_card_ptr); + size_t to_card_num = ptr_2_card_num(last_card_ptr) + 1; + clear_range(from_card_num, to_card_num); + } +} + +void G1CardCounts::clear_all() { + assert(SafepointSynchronize::is_at_safepoint(), "don't call this otherwise"); + clear_range((size_t)0, _committed_max_card_num); +} + +G1CardCounts::~G1CardCounts() { + if (has_reserved_count_table()) { + _card_counts_storage.release(); + } +} + diff --git a/src/share/vm/gc_implementation/g1/g1CardCounts.hpp b/src/share/vm/gc_implementation/g1/g1CardCounts.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cef297bd234005fc654576dfe989c255e3d25f15 --- /dev/null +++ b/src/share/vm/gc_implementation/g1/g1CardCounts.hpp @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1CARDCOUNTS_HPP +#define SHARE_VM_GC_IMPLEMENTATION_G1_G1CARDCOUNTS_HPP + +#include "memory/allocation.hpp" +#include "runtime/virtualspace.hpp" +#include "utilities/globalDefinitions.hpp" + +class CardTableModRefBS; +class G1CollectedHeap; +class HeapRegion; + +// Table to track the number of times a card has been refined. Once +// a card has been refined a certain number of times, it is +// considered 'hot' and its refinement is delayed by inserting the +// card into the hot card cache. The card will then be refined when +// it is evicted from the hot card cache, or when the hot card cache +// is 'drained' during the next evacuation pause. + +class G1CardCounts: public CHeapObj { + G1CollectedHeap* _g1h; + + // The table of counts + jubyte* _card_counts; + + // Max capacity of the reserved space for the counts table + size_t _reserved_max_card_num; + + // Max capacity of the committed space for the counts table + size_t _committed_max_card_num; + + // Size of committed space for the counts table + size_t _committed_size; + + // CardTable bottom. + const jbyte* _ct_bot; + + // Barrier set + CardTableModRefBS* _ct_bs; + + // The virtual memory backing the counts table + VirtualSpace _card_counts_storage; + + // Returns true if the card counts table has been reserved. + bool has_reserved_count_table() { return _card_counts != NULL; } + + // Returns true if the card counts table has been reserved and committed. + bool has_count_table() { + return has_reserved_count_table() && _committed_max_card_num > 0; + } + + void check_card_num(size_t card_num, const char* msg) { + assert(card_num >= 0 && card_num < _committed_max_card_num, msg); + } + + size_t ptr_2_card_num(const jbyte* card_ptr) { + assert(card_ptr >= _ct_bot, + err_msg("Inavalied card pointer: " + "card_ptr: " PTR_FORMAT ", " + "_ct_bot: " PTR_FORMAT, + card_ptr, _ct_bot)); + size_t card_num = pointer_delta(card_ptr, _ct_bot, sizeof(jbyte)); + check_card_num(card_num, + err_msg("card pointer out of range: " PTR_FORMAT, card_ptr)); + return card_num; + } + + jbyte* card_num_2_ptr(size_t card_num) { + check_card_num(card_num, + err_msg("card num out of range: "SIZE_FORMAT, card_num)); + return (jbyte*) (_ct_bot + card_num); + } + + // Clear the counts table for the given (exclusive) index range. + void clear_range(size_t from_card_num, size_t to_card_num); + + public: + G1CardCounts(G1CollectedHeap* g1h); + ~G1CardCounts(); + + void initialize(); + + // Resize the committed space for the card counts table in + // response to a resize of the committed space for the heap. + void resize(size_t heap_capacity); + + // Increments the refinement count for the given card. + // Returns the pre-increment count value. + uint add_card_count(jbyte* card_ptr); + + // Returns true if the given count is high enough to be considered + // 'hot'; false otherwise. + bool is_hot(uint count); + + // Clears the card counts for the cards spanned by the region + void clear_region(HeapRegion* hr); + + // Clear the entire card counts table during GC. + // Updates the policy stats with the duration. + void clear_all(); +}; + +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1CARDCOUNTS_HPP diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp index 7b69a088f2ab9d99758d71086b800d598ace1011..49cde4272381e6a3aca8684ad283ab0641b1048a 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -96,7 +96,7 @@ public: _sts(sts), _g1rs(g1rs), _cg1r(cg1r), _concurrent(true) {} bool do_card_ptr(jbyte* card_ptr, int worker_i) { - bool oops_into_cset = _g1rs->concurrentRefineOneCard(card_ptr, worker_i, false); + bool oops_into_cset = _g1rs->refine_card(card_ptr, worker_i, false); // This path is executed by the concurrent refine or mutator threads, // concurrently, and so we do not care if card_ptr contains references // that point into the collection set. @@ -1452,9 +1452,10 @@ bool G1CollectedHeap::do_collection(bool explicit_gc, _hr_printer.end_gc(true /* full */, (size_t) total_collections()); } - if (_cg1r->use_cache()) { - _cg1r->clear_and_record_card_counts(); - _cg1r->clear_hot_cache(); + G1HotCardCache* hot_card_cache = _cg1r->hot_card_cache(); + if (hot_card_cache->use_cache()) { + hot_card_cache->reset_card_counts(); + hot_card_cache->reset_hot_cache(); } // Rebuild remembered sets of all regions. @@ -1767,6 +1768,8 @@ void G1CollectedHeap::update_committed_space(HeapWord* old_end, Universe::heap()->barrier_set()->resize_covered_region(_g1_committed); // Tell the BOT about the update. _bot_shared->resize(_g1_committed.word_size()); + // Tell the hot card cache about the update + _cg1r->hot_card_cache()->resize_card_counts(capacity()); } bool G1CollectedHeap::expand(size_t expand_bytes) { @@ -1999,7 +2002,7 @@ jint G1CollectedHeap::initialize() { Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap"); Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap"); - _cg1r = new ConcurrentG1Refine(); + _cg1r = new ConcurrentG1Refine(this); // Reserve the maximum. @@ -2060,6 +2063,9 @@ jint G1CollectedHeap::initialize() { (HeapWord*) _g1_reserved.end(), _expansion_regions); + // Do later initialization work for concurrent refinement. + _cg1r->init(); + // 6843694 - ensure that the maximum region index can fit // in the remembered set structures. const uint max_region_idx = (1U << (sizeof(RegionIdx_t)*BitsPerByte-1)) - 1; @@ -2077,20 +2083,20 @@ jint G1CollectedHeap::initialize() { _g1h = this; - _in_cset_fast_test_length = max_regions(); - _in_cset_fast_test_base = + _in_cset_fast_test_length = max_regions(); + _in_cset_fast_test_base = NEW_C_HEAP_ARRAY(bool, (size_t) _in_cset_fast_test_length, mtGC); - // We're biasing _in_cset_fast_test to avoid subtracting the - // beginning of the heap every time we want to index; basically - // it's the same with what we do with the card table. - _in_cset_fast_test = _in_cset_fast_test_base - + // We're biasing _in_cset_fast_test to avoid subtracting the + // beginning of the heap every time we want to index; basically + // it's the same with what we do with the card table. + _in_cset_fast_test = _in_cset_fast_test_base - ((uintx) _g1_reserved.start() >> HeapRegion::LogOfHRGrainBytes); - // Clear the _cset_fast_test bitmap in anticipation of adding - // regions to the incremental collection set for the first - // evacuation pause. - clear_cset_fast_test(); + // Clear the _cset_fast_test bitmap in anticipation of adding + // regions to the incremental collection set for the first + // evacuation pause. + clear_cset_fast_test(); // Create the ConcurrentMark data structure and thread. // (Must do this late, so that "max_regions" is defined.) @@ -2152,9 +2158,6 @@ jint G1CollectedHeap::initialize() { // counts and that mechanism. SpecializationStats::clear(); - // Do later initialization work for concurrent refinement. - _cg1r->init(); - // Here we allocate the dummy full region that is required by the // G1AllocRegion class. If we don't pass an address in the reserved // space here, lots of asserts fire. @@ -2313,7 +2316,8 @@ void G1CollectedHeap::iterate_dirty_card_closure(CardTableEntryClosure* cl, bool concurrent, int worker_i) { // Clean cards in the hot card cache - concurrent_g1_refine()->clean_up_cache(worker_i, g1_rem_set(), into_cset_dcq); + G1HotCardCache* hot_card_cache = _cg1r->hot_card_cache(); + hot_card_cache->drain(worker_i, g1_rem_set(), into_cset_dcq); DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); int n_completed_buffers = 0; @@ -5603,8 +5607,11 @@ void G1CollectedHeap::evacuate_collection_set() { NOT_PRODUCT(set_evacuation_failure_alot_for_current_gc();) g1_rem_set()->prepare_for_oops_into_collection_set_do(); - concurrent_g1_refine()->set_use_cache(false); - concurrent_g1_refine()->clear_hot_cache_claimed_index(); + + // Disable the hot card cache. + G1HotCardCache* hot_card_cache = _cg1r->hot_card_cache(); + hot_card_cache->reset_hot_cache_claimed_index(); + hot_card_cache->set_use_cache(false); uint n_workers; if (G1CollectedHeap::use_parallel_gc_threads()) { @@ -5686,8 +5693,11 @@ void G1CollectedHeap::evacuate_collection_set() { release_gc_alloc_regions(n_workers); g1_rem_set()->cleanup_after_oops_into_collection_set_do(); - concurrent_g1_refine()->clear_hot_cache(); - concurrent_g1_refine()->set_use_cache(true); + // Reset and re-enable the hot card cache. + // Note the counts for the cards in the regions in the + // collection set are reset when the collection set is freed. + hot_card_cache->reset_hot_cache(); + hot_card_cache->set_use_cache(true); finalize_for_evac_failure(); @@ -5749,6 +5759,12 @@ void G1CollectedHeap::free_region(HeapRegion* hr, assert(!hr->is_empty(), "the region should not be empty"); assert(free_list != NULL, "pre-condition"); + // Clear the card counts for this region. + // Note: we only need to do this if the region is not young + // (since we don't refine cards in young regions). + if (!hr->is_young()) { + _cg1r->hot_card_cache()->reset_card_counts(hr); + } *pre_used += hr->used(); hr->hr_clear(par, true /* clear_space */); free_list->add_as_head(hr); diff --git a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp index 9782b67c007a5b77a31eb42959d873d99fd168a1..f6d3b6f4bd9b080cb31200c93fa35f89e6b8418f 100644 --- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp +++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -155,11 +155,6 @@ void WorkerDataArray::verify() { G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) : _max_gc_threads(max_gc_threads), - _min_clear_cc_time_ms(-1.0), - _max_clear_cc_time_ms(-1.0), - _cur_clear_cc_time_ms(0.0), - _cum_clear_cc_time_ms(0.0), - _num_cc_clears(0L), _last_gc_worker_start_times_ms(_max_gc_threads, "%.1lf", false), _last_ext_root_scan_times_ms(_max_gc_threads, "%.1lf"), _last_satb_filtering_times_ms(_max_gc_threads, "%.1lf"), @@ -212,11 +207,11 @@ void G1GCPhaseTimes::note_gc_end() { _last_gc_worker_times_ms.set(i, worker_time); double worker_known_time = _last_ext_root_scan_times_ms.get(i) + - _last_satb_filtering_times_ms.get(i) + - _last_update_rs_times_ms.get(i) + - _last_scan_rs_times_ms.get(i) + - _last_obj_copy_times_ms.get(i) + - _last_termination_times_ms.get(i); + _last_satb_filtering_times_ms.get(i) + + _last_update_rs_times_ms.get(i) + + _last_scan_rs_times_ms.get(i) + + _last_obj_copy_times_ms.get(i) + + _last_termination_times_ms.get(i); double worker_other_time = worker_time - worker_known_time; _last_gc_worker_other_times_ms.set(i, worker_other_time); @@ -285,15 +280,6 @@ void G1GCPhaseTimes::print(double pause_time_sec) { } print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms); print_stats(1, "Clear CT", _cur_clear_ct_time_ms); - if (Verbose && G1Log::finest()) { - print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms); - print_stats(1, "Cum Clear CC", _cum_clear_cc_time_ms); - print_stats(1, "Min Clear CC", _min_clear_cc_time_ms); - print_stats(1, "Max Clear CC", _max_clear_cc_time_ms); - if (_num_cc_clears > 0) { - print_stats(1, "Avg Clear CC", _cum_clear_cc_time_ms / ((double)_num_cc_clears)); - } - } double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms(); print_stats(1, "Other", misc_time_ms); if (_cur_verify_before_time_ms > 0.0) { @@ -311,19 +297,3 @@ void G1GCPhaseTimes::print(double pause_time_sec) { print_stats(2, "Verify After", _cur_verify_after_time_ms); } } - -void G1GCPhaseTimes::record_cc_clear_time_ms(double ms) { - if (!(Verbose && G1Log::finest())) { - return; - } - - if (_min_clear_cc_time_ms < 0.0 || ms <= _min_clear_cc_time_ms) { - _min_clear_cc_time_ms = ms; - } - if (_max_clear_cc_time_ms < 0.0 || ms >= _max_clear_cc_time_ms) { - _max_clear_cc_time_ms = ms; - } - _cur_clear_cc_time_ms = ms; - _cum_clear_cc_time_ms += ms; - _num_cc_clears++; -} diff --git a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp index b6e289ed22a268f7bdda3a77ea5407405650e12a..2fa5300b1414377e2689bb9f34bbee6bcdbc9e6c 100644 --- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp +++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -133,13 +133,6 @@ class G1GCPhaseTimes : public CHeapObj { double _cur_ref_proc_time_ms; double _cur_ref_enq_time_ms; - // Card Table Count Cache stats - double _min_clear_cc_time_ms; // min - double _max_clear_cc_time_ms; // max - double _cur_clear_cc_time_ms; // clearing time during current pause - double _cum_clear_cc_time_ms; // cummulative clearing time - jlong _num_cc_clears; // number of times the card count cache has been cleared - double _cur_collection_start_sec; double _root_region_scan_wait_time_ms; @@ -227,8 +220,6 @@ class G1GCPhaseTimes : public CHeapObj { _root_region_scan_wait_time_ms = time_ms; } - void record_cc_clear_time_ms(double ms); - void record_young_free_cset_time_ms(double time_ms) { _recorded_young_free_cset_time_ms = time_ms; } diff --git a/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5d224ce8fab07990f7b9d66a3e98de3dd7eefaf0 --- /dev/null +++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/g1/dirtyCardQueue.hpp" +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1HotCardCache.hpp" +#include "gc_implementation/g1/g1RemSet.hpp" +#include "gc_implementation/g1/heapRegion.hpp" +#include "runtime/atomic.hpp" + +G1HotCardCache::G1HotCardCache(G1CollectedHeap *g1h): + _g1h(g1h), _hot_cache(NULL), _use_cache(false), _card_counts(g1h) {} + +void G1HotCardCache::initialize() { + if (default_use_cache()) { + _use_cache = true; + + _hot_cache_size = (1 << G1ConcRSLogCacheSize); + _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC); + + _n_hot = 0; + _hot_cache_idx = 0; + + // For refining the cards in the hot cache in parallel + int n_workers = (ParallelGCThreads > 0 ? + _g1h->workers()->total_workers() : 1); + _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers); + _hot_cache_par_claimed_idx = 0; + + _card_counts.initialize(); + } +} + +G1HotCardCache::~G1HotCardCache() { + if (default_use_cache()) { + assert(_hot_cache != NULL, "Logic"); + FREE_C_HEAP_ARRAY(jbyte*, _hot_cache, mtGC); + } +} + +jbyte* G1HotCardCache::insert(jbyte* card_ptr) { + uint count = _card_counts.add_card_count(card_ptr); + if (!_card_counts.is_hot(count)) { + // The card is not hot so do not store it in the cache; + // return it for immediate refining. + return card_ptr; + } + + // Otherwise, the card is hot. + jbyte* res = NULL; + MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); + if (_n_hot == _hot_cache_size) { + res = _hot_cache[_hot_cache_idx]; + _n_hot--; + } + + // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. + _hot_cache[_hot_cache_idx] = card_ptr; + _hot_cache_idx++; + + if (_hot_cache_idx == _hot_cache_size) { + // Wrap around + _hot_cache_idx = 0; + } + _n_hot++; + + return res; +} + +void G1HotCardCache::drain(int worker_i, + G1RemSet* g1rs, + DirtyCardQueue* into_cset_dcq) { + if (!default_use_cache()) { + assert(_hot_cache == NULL, "Logic"); + return; + } + + assert(_hot_cache != NULL, "Logic"); + assert(!use_cache(), "cache should be disabled"); + int start_idx; + + while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once + int end_idx = start_idx + _hot_cache_par_chunk_size; + + if (start_idx == + Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { + // The current worker has successfully claimed the chunk [start_idx..end_idx) + end_idx = MIN2(end_idx, _n_hot); + for (int i = start_idx; i < end_idx; i++) { + jbyte* card_ptr = _hot_cache[i]; + if (card_ptr != NULL) { + if (g1rs->refine_card(card_ptr, worker_i, true)) { + // The part of the heap spanned by the card contains references + // that point into the current collection set. + // We need to record the card pointer in the DirtyCardQueueSet + // that we use for such cards. + // + // The only time we care about recording cards that contain + // references that point into the collection set is during + // RSet updating while within an evacuation pause. + // In this case worker_i should be the id of a GC worker thread + assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); + assert(worker_i < (int) (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), + err_msg("incorrect worker id: "INT32_FORMAT, worker_i)); + + into_cset_dcq->enqueue(card_ptr); + } + } + } + } + } + // The existing entries in the hot card cache, which were just refined + // above, are discarded prior to re-enabling the cache near the end of the GC. +} + +void G1HotCardCache::resize_card_counts(size_t heap_capacity) { + _card_counts.resize(heap_capacity); +} + +void G1HotCardCache::reset_card_counts(HeapRegion* hr) { + _card_counts.clear_region(hr); +} + +void G1HotCardCache::reset_card_counts() { + _card_counts.clear_all(); +} diff --git a/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp b/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp new file mode 100644 index 0000000000000000000000000000000000000000..99d700d63a9e9bfd3b92c053458fbc41589bde31 --- /dev/null +++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1HOTCARDCACHE_HPP +#define SHARE_VM_GC_IMPLEMENTATION_G1_G1HOTCARDCACHE_HPP + +#include "gc_implementation/g1/g1_globals.hpp" +#include "gc_implementation/g1/g1CardCounts.hpp" +#include "memory/allocation.hpp" +#include "runtime/safepoint.hpp" +#include "runtime/thread.inline.hpp" +#include "utilities/globalDefinitions.hpp" + +class DirtyCardQueue; +class G1CollectedHeap; +class G1RemSet; +class HeapRegion; + +// An evicting cache of cards that have been logged by the G1 post +// write barrier. Placing a card in the cache delays the refinement +// of the card until the card is evicted, or the cache is drained +// during the next evacuation pause. +// +// The first thing the G1 post write barrier does is to check whether +// the card containing the updated pointer is already dirty and, if +// so, skips the remaining code in the barrier. +// +// Delaying the refinement of a card will make the card fail the +// first is_dirty check in the write barrier, skipping the remainder +// of the write barrier. +// +// This can significantly reduce the overhead of the write barrier +// code, increasing throughput. + +class G1HotCardCache: public CHeapObj { + G1CollectedHeap* _g1h; + + // The card cache table + jbyte** _hot_cache; + + int _hot_cache_size; + int _n_hot; + int _hot_cache_idx; + + int _hot_cache_par_chunk_size; + volatile int _hot_cache_par_claimed_idx; + + bool _use_cache; + + G1CardCounts _card_counts; + + bool default_use_cache() const { + return (G1ConcRSLogCacheSize > 0); + } + + public: + G1HotCardCache(G1CollectedHeap* g1h); + ~G1HotCardCache(); + + void initialize(); + + bool use_cache() { return _use_cache; } + + void set_use_cache(bool b) { + _use_cache = (b ? default_use_cache() : false); + } + + // Returns the card to be refined or NULL. + // + // Increments the count for given the card. if the card is not 'hot', + // it is returned for immediate refining. Otherwise the card is + // added to the hot card cache. + // If there is enough room in the hot card cache for the card we're + // adding, NULL is returned and no further action in needed. + // If we evict a card from the cache to make room for the new card, + // the evicted card is then returned for refinement. + jbyte* insert(jbyte* card_ptr); + + // Refine the cards that have delayed as a result of + // being in the cache. + void drain(int worker_i, G1RemSet* g1rs, DirtyCardQueue* into_cset_dcq); + + // Set up for parallel processing of the cards in the hot cache + void reset_hot_cache_claimed_index() { + _hot_cache_par_claimed_idx = 0; + } + + // Resets the hot card cache and discards the entries. + void reset_hot_cache() { + assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); + assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread"); + _hot_cache_idx = 0; _n_hot = 0; + } + + bool hot_cache_is_empty() { return _n_hot == 0; } + + // Resizes the card counts table to match the given capacity + void resize_card_counts(size_t heap_capacity); + + // Zeros the values in the card counts table for entire committed heap + void reset_card_counts(); + + // Zeros the values in the card counts table for the given region + void reset_card_counts(HeapRegion* hr); +}; + +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1HOTCARDCACHE_HPP diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/src/share/vm/gc_implementation/g1/g1RemSet.cpp index e7151071e9f8bd0db3453c6f6689497c3d267c16..d527a3fc61d541d9ca5f6aa1b75b823db3b7ded2 100644 --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,6 +29,7 @@ #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "gc_implementation/g1/g1CollectorPolicy.hpp" +#include "gc_implementation/g1/g1HotCardCache.hpp" #include "gc_implementation/g1/g1GCPhaseTimes.hpp" #include "gc_implementation/g1/g1OopClosures.inline.hpp" #include "gc_implementation/g1/g1RemSet.inline.hpp" @@ -247,7 +248,7 @@ public: assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause"); assert(worker_i < (int) (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), "should be a GC worker"); - if (_g1rs->concurrentRefineOneCard(card_ptr, worker_i, true)) { + if (_g1rs->refine_card(card_ptr, worker_i, true)) { // 'card_ptr' contains references that point into the collection // set. We need to record the card in the DCQS // (G1CollectedHeap::into_cset_dirty_card_queue_set()) @@ -288,9 +289,6 @@ void G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, #if CARD_REPEAT_HISTO ct_freq_update_histo_and_reset(); #endif - if (worker_i == 0) { - _cg1r->clear_and_record_card_counts(); - } // We cache the value of 'oc' closure into the appropriate slot in the // _cset_rs_update_cl for this worker @@ -396,7 +394,7 @@ public: // RSet updating, // * the post-write barrier shouldn't be logging updates to young // regions (but there is a situation where this can happen - see - // the comment in G1RemSet::concurrentRefineOneCard below - + // the comment in G1RemSet::refine_card() below - // that should not be applicable here), and // * during actual RSet updating, the filtering of cards in young // regions in HeapRegion::oops_on_card_seq_iterate_careful is @@ -502,8 +500,6 @@ void G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm, claim_val); } - - G1TriggerClosure::G1TriggerClosure() : _triggered(false) { } @@ -524,13 +520,91 @@ G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h, _record_refs_into_cset(record_refs_into_cset), _push_ref_cl(push_ref_cl), _worker_i(worker_i) { } -bool G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i, - bool check_for_refs_into_cset) { +// Returns true if the given card contains references that point +// into the collection set, if we're checking for such references; +// false otherwise. + +bool G1RemSet::refine_card(jbyte* card_ptr, int worker_i, + bool check_for_refs_into_cset) { + + // If the card is no longer dirty, nothing to do. + if (*card_ptr != CardTableModRefBS::dirty_card_val()) { + // No need to return that this card contains refs that point + // into the collection set. + return false; + } + // Construct the region representing the card. HeapWord* start = _ct_bs->addr_for(card_ptr); // And find the region containing it. HeapRegion* r = _g1->heap_region_containing(start); - assert(r != NULL, "unexpected null"); + if (r == NULL) { + // Again no need to return that this card contains refs that + // point into the collection set. + return false; // Not in the G1 heap (might be in perm, for example.) + } + + // Why do we have to check here whether a card is on a young region, + // given that we dirty young regions and, as a result, the + // post-barrier is supposed to filter them out and never to enqueue + // them? When we allocate a new region as the "allocation region" we + // actually dirty its cards after we release the lock, since card + // dirtying while holding the lock was a performance bottleneck. So, + // as a result, it is possible for other threads to actually + // allocate objects in the region (after the acquire the lock) + // before all the cards on the region are dirtied. This is unlikely, + // and it doesn't happen often, but it can happen. So, the extra + // check below filters out those cards. + if (r->is_young()) { + return false; + } + + // While we are processing RSet buffers during the collection, we + // actually don't want to scan any cards on the collection set, + // since we don't want to update remebered sets with entries that + // point into the collection set, given that live objects from the + // collection set are about to move and such entries will be stale + // very soon. This change also deals with a reliability issue which + // involves scanning a card in the collection set and coming across + // an array that was being chunked and looking malformed. Note, + // however, that if evacuation fails, we have to scan any objects + // that were not moved and create any missing entries. + if (r->in_collection_set()) { + return false; + } + + // The result from the hot card cache insert call is either: + // * pointer to the current card + // (implying that the current card is not 'hot'), + // * null + // (meaning we had inserted the card ptr into the "hot" card cache, + // which had some headroom), + // * a pointer to a "hot" card that was evicted from the "hot" cache. + // + + G1HotCardCache* hot_card_cache = _cg1r->hot_card_cache(); + if (hot_card_cache->use_cache()) { + assert(!check_for_refs_into_cset, "sanity"); + assert(!SafepointSynchronize::is_at_safepoint(), "sanity"); + + card_ptr = hot_card_cache->insert(card_ptr); + if (card_ptr == NULL) { + // There was no eviction. Nothing to do. + return false; + } + + start = _ct_bs->addr_for(card_ptr); + r = _g1->heap_region_containing(start); + if (r == NULL) { + // Not in the G1 heap + return false; + } + + // Checking whether the region we got back from the cache + // is young here is inappropriate. The region could have been + // freed, reallocated and tagged as young while in the cache. + // Hence we could see its young type change at any time. + } // Don't use addr_for(card_ptr + 1) which can ask for // a card beyond the heap. This is not safe without a perm @@ -610,140 +684,17 @@ bool G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i, _conc_refine_cards++; } - return trigger_cl.triggered(); -} - -bool G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i, - bool check_for_refs_into_cset) { - // If the card is no longer dirty, nothing to do. - if (*card_ptr != CardTableModRefBS::dirty_card_val()) { - // No need to return that this card contains refs that point - // into the collection set. - return false; - } - - // Construct the region representing the card. - HeapWord* start = _ct_bs->addr_for(card_ptr); - // And find the region containing it. - HeapRegion* r = _g1->heap_region_containing(start); - if (r == NULL) { - // Again no need to return that this card contains refs that - // point into the collection set. - return false; // Not in the G1 heap (might be in perm, for example.) - } - // Why do we have to check here whether a card is on a young region, - // given that we dirty young regions and, as a result, the - // post-barrier is supposed to filter them out and never to enqueue - // them? When we allocate a new region as the "allocation region" we - // actually dirty its cards after we release the lock, since card - // dirtying while holding the lock was a performance bottleneck. So, - // as a result, it is possible for other threads to actually - // allocate objects in the region (after the acquire the lock) - // before all the cards on the region are dirtied. This is unlikely, - // and it doesn't happen often, but it can happen. So, the extra - // check below filters out those cards. - if (r->is_young()) { - return false; - } - // While we are processing RSet buffers during the collection, we - // actually don't want to scan any cards on the collection set, - // since we don't want to update remebered sets with entries that - // point into the collection set, given that live objects from the - // collection set are about to move and such entries will be stale - // very soon. This change also deals with a reliability issue which - // involves scanning a card in the collection set and coming across - // an array that was being chunked and looking malformed. Note, - // however, that if evacuation fails, we have to scan any objects - // that were not moved and create any missing entries. - if (r->in_collection_set()) { - return false; - } + // This gets set to true if the card being refined has + // references that point into the collection set. + bool has_refs_into_cset = trigger_cl.triggered(); - // Should we defer processing the card? - // - // Previously the result from the insert_cache call would be - // either card_ptr (implying that card_ptr was currently "cold"), - // null (meaning we had inserted the card ptr into the "hot" - // cache, which had some headroom), or a "hot" card ptr - // extracted from the "hot" cache. - // - // Now that the _card_counts cache in the ConcurrentG1Refine - // instance is an evicting hash table, the result we get back - // could be from evicting the card ptr in an already occupied - // bucket (in which case we have replaced the card ptr in the - // bucket with card_ptr and "defer" is set to false). To avoid - // having a data structure (updates to which would need a lock) - // to hold these unprocessed dirty cards, we need to immediately - // process card_ptr. The actions needed to be taken on return - // from cache_insert are summarized in the following table: - // - // res defer action - // -------------------------------------------------------------- - // null false card evicted from _card_counts & replaced with - // card_ptr; evicted ptr added to hot cache. - // No need to process res; immediately process card_ptr - // - // null true card not evicted from _card_counts; card_ptr added - // to hot cache. - // Nothing to do. - // - // non-null false card evicted from _card_counts & replaced with - // card_ptr; evicted ptr is currently "cold" or - // caused an eviction from the hot cache. - // Immediately process res; process card_ptr. - // - // non-null true card not evicted from _card_counts; card_ptr is - // currently cold, or caused an eviction from hot - // cache. - // Immediately process res; no need to process card_ptr. - - - jbyte* res = card_ptr; - bool defer = false; - - // This gets set to true if the card being refined has references - // that point into the collection set. - bool oops_into_cset = false; - - if (_cg1r->use_cache()) { - jbyte* res = _cg1r->cache_insert(card_ptr, &defer); - if (res != NULL && (res != card_ptr || defer)) { - start = _ct_bs->addr_for(res); - r = _g1->heap_region_containing(start); - if (r != NULL) { - // Checking whether the region we got back from the cache - // is young here is inappropriate. The region could have been - // freed, reallocated and tagged as young while in the cache. - // Hence we could see its young type change at any time. - // - // Process card pointer we get back from the hot card cache. This - // will check whether the region containing the card is young - // _after_ checking that the region has been allocated from. - oops_into_cset = concurrentRefineOneCard_impl(res, worker_i, - false /* check_for_refs_into_cset */); - // The above call to concurrentRefineOneCard_impl is only - // performed if the hot card cache is enabled. This cache is - // disabled during an evacuation pause - which is the only - // time when we need know if the card contains references - // that point into the collection set. Also when the hot card - // cache is enabled, this code is executed by the concurrent - // refine threads - rather than the GC worker threads - and - // concurrentRefineOneCard_impl will return false. - assert(!oops_into_cset, "should not see true here"); - } - } - } - - if (!defer) { - oops_into_cset = - concurrentRefineOneCard_impl(card_ptr, worker_i, check_for_refs_into_cset); - // We should only be detecting that the card contains references - // that point into the collection set if the current thread is - // a GC worker thread. - assert(!oops_into_cset || SafepointSynchronize::is_at_safepoint(), + // We should only be detecting that the card contains references + // that point into the collection set if the current thread is + // a GC worker thread. + assert(!has_refs_into_cset || SafepointSynchronize::is_at_safepoint(), "invalid result at non safepoint"); - } - return oops_into_cset; + + return has_refs_into_cset; } class HRRSStatsIter: public HeapRegionClosure { @@ -846,13 +797,16 @@ void G1RemSet::prepare_for_verify() { DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); dcqs.concatenate_logs(); } - bool cg1r_use_cache = _cg1r->use_cache(); - _cg1r->set_use_cache(false); + + G1HotCardCache* hot_card_cache = _cg1r->hot_card_cache(); + bool use_hot_card_cache = hot_card_cache->use_cache(); + hot_card_cache->set_use_cache(false); + DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set()); updateRS(&into_cset_dcq, 0); _g1->into_cset_dirty_card_queue_set().clear(); - _cg1r->set_use_cache(cg1r_use_cache); + hot_card_cache->set_use_cache(use_hot_card_cache); assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed"); } } diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.hpp b/src/share/vm/gc_implementation/g1/g1RemSet.hpp index eee6b44708751f0b4578701ede4b96d20af75357..7444ae819fc017d2922028f759fb3f07295e9930 100644 --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -66,14 +66,6 @@ protected: // references into the collection set. OopsInHeapRegionClosure** _cset_rs_update_cl; - // The routine that performs the actual work of refining a dirty - // card. - // If check_for_refs_into_refs is true then a true result is returned - // if the card contains oops that have references into the current - // collection set. - bool concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i, - bool check_for_refs_into_cset); - public: // This is called to reset dual hash tables after the gc pause // is finished and the initial hash table is no longer being @@ -90,8 +82,7 @@ public: // function can be helpful in partitioning the work to be done. It // should be the same as the "i" passed to the calling thread's // work(i) function. In the sequential case this param will be ingored. - void oops_into_collection_set_do(OopsInHeapRegionClosure* blk, - int worker_i); + void oops_into_collection_set_do(OopsInHeapRegionClosure* blk, int worker_i); // Prepare for and cleanup after an oops_into_collection_set_do // call. Must call each of these once before and after (in sequential @@ -124,14 +115,13 @@ public: void scrub_par(BitMap* region_bm, BitMap* card_bm, uint worker_num, int claim_val); - // Refine the card corresponding to "card_ptr". If "sts" is non-NULL, - // join and leave around parts that must be atomic wrt GC. (NULL means - // being done at a safepoint.) + // Refine the card corresponding to "card_ptr". // If check_for_refs_into_cset is true, a true result is returned // if the given card contains oops that have references into the // current collection set. - virtual bool concurrentRefineOneCard(jbyte* card_ptr, int worker_i, - bool check_for_refs_into_cset); + virtual bool refine_card(jbyte* card_ptr, + int worker_i, + bool check_for_refs_into_cset); // Print any relevant summary info. virtual void print_summary_info(); diff --git a/src/share/vm/gc_implementation/g1/g1_globals.hpp b/src/share/vm/gc_implementation/g1/g1_globals.hpp index 8bd099bd41af445d090b27be466e2caa7d886270..7e62b70cdca82fe7dc129201d52e651cf434f52a 100644 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp @@ -163,16 +163,12 @@ "Select green, yellow and red zones adaptively to meet the " \ "the pause requirements.") \ \ - develop(intx, G1ConcRSLogCacheSize, 10, \ + product(uintx, G1ConcRSLogCacheSize, 10, \ "Log base 2 of the length of conc RS hot-card cache.") \ \ - develop(intx, G1ConcRSHotCardLimit, 4, \ + product(uintx, G1ConcRSHotCardLimit, 4, \ "The threshold that defines (>=) a hot card.") \ \ - develop(intx, G1MaxHotCardCountSizePercent, 25, \ - "The maximum size of the hot card count cache as a " \ - "percentage of the number of cards for the maximum heap.") \ - \ develop(bool, G1PrintOopAppls, false, \ "When true, print applications of closures to external locs.") \ \ @@ -247,10 +243,6 @@ "If non-0 is the number of parallel rem set update threads, " \ "otherwise the value is determined ergonomically.") \ \ - develop(intx, G1CardCountCacheExpandThreshold, 16, \ - "Expand the card count cache if the number of collisions for " \ - "a particular entry exceeds this value.") \ - \ develop(bool, G1VerifyCTCleanup, false, \ "Verify card table cleanup.") \ \ diff --git a/src/share/vm/runtime/arguments.cpp b/src/share/vm/runtime/arguments.cpp index dcb74c417f7c6ba55bde5f11f1e9755f620eed41..bf149fcd0079ffa976f24ddf3774ecddbd827920 100644 --- a/src/share/vm/runtime/arguments.cpp +++ b/src/share/vm/runtime/arguments.cpp @@ -2051,6 +2051,10 @@ bool Arguments::check_vm_args_consistency() { "G1RefProcDrainInterval"); status = status && verify_min_value((intx)G1ConcMarkStepDurationMillis, 1, "G1ConcMarkStepDurationMillis"); + status = status && verify_interval(G1ConcRSHotCardLimit, 0, max_jubyte, + "G1ConcRSHotCardLimit"); + status = status && verify_interval(G1ConcRSLogCacheSize, 0, 31, + "G1ConcRSLogCacheSize"); } #endif // INCLUDE_ALL_GCS