提交 88955d77 编写于 作者: I iveresov

6923991: G1: improve scalability of RSet scanning

Summary: Implemented block-based work stealing. Moved copying during the rset scanning phase to the main copying phase. Made the size of rset table depend on the region size.
Reviewed-by: apetrusenko, tonyp
上级 57176051
......@@ -2646,6 +2646,13 @@ G1CollectedHeap::cleanup_surviving_young_words() {
// </NEW PREDICTION>
struct PrepareForRSScanningClosure : public HeapRegionClosure {
bool doHeapRegion(HeapRegion *r) {
r->rem_set()->set_iter_claimed(0);
return false;
}
};
void
G1CollectedHeap::do_collection_pause_at_safepoint() {
if (PrintHeapAtGC) {
......@@ -2784,6 +2791,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint() {
gclog_or_tty->print_cr("\nAfter pause, heap:");
print();
#endif
PrepareForRSScanningClosure prepare_for_rs_scan;
collection_set_iterate(&prepare_for_rs_scan);
setup_surviving_young_words();
......@@ -3781,22 +3790,16 @@ oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
return obj;
}
template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee, bool skip_cset_test>
template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
template <class T>
void G1ParCopyClosure <do_gen_barrier, barrier, do_mark_forwardee, skip_cset_test>
void G1ParCopyClosure <do_gen_barrier, barrier, do_mark_forwardee>
::do_oop_work(T* p) {
oop obj = oopDesc::load_decode_heap_oop(p);
assert(barrier != G1BarrierRS || obj != NULL,
"Precondition: G1BarrierRS implies obj is nonNull");
// The only time we skip the cset test is when we're scanning
// references popped from the queue. And we only push on the queue
// references that we know point into the cset, so no point in
// checking again. But we'll leave an assert here for peace of mind.
assert(!skip_cset_test || _g1->obj_in_cs(obj), "invariant");
// here the null check is implicit in the cset_fast_test() test
if (skip_cset_test || _g1->in_cset_fast_test(obj)) {
if (_g1->in_cset_fast_test(obj)) {
#if G1_REM_SET_LOGGING
gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" "
"into CS.", p, (void*) obj);
......@@ -3813,7 +3816,6 @@ void G1ParCopyClosure <do_gen_barrier, barrier, do_mark_forwardee, skip_cset_tes
}
}
// When scanning moved objs, must look at all oops.
if (barrier == G1BarrierEvac && obj != NULL) {
_par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
}
......@@ -3823,8 +3825,8 @@ void G1ParCopyClosure <do_gen_barrier, barrier, do_mark_forwardee, skip_cset_tes
}
}
template void G1ParCopyClosure<false, G1BarrierEvac, false, true>::do_oop_work(oop* p);
template void G1ParCopyClosure<false, G1BarrierEvac, false, true>::do_oop_work(narrowOop* p);
template void G1ParCopyClosure<false, G1BarrierEvac, false>::do_oop_work(oop* p);
template void G1ParCopyClosure<false, G1BarrierEvac, false>::do_oop_work(narrowOop* p);
template <class T> void G1ParScanPartialArrayClosure::do_oop_nv(T* p) {
assert(has_partial_array_mask(p), "invariant");
......@@ -3896,11 +3898,11 @@ public:
assert(UseCompressedOops, "Error");
narrowOop* p = (narrowOop*) stolen_task;
assert(has_partial_array_mask(p) ||
_g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "Error");
_g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "Error");
pss->push_on_queue(p);
} else {
oop* p = (oop*) stolen_task;
assert(has_partial_array_mask(p) || _g1h->obj_in_cs(*p), "Error");
assert(has_partial_array_mask(p) || _g1h->is_in_g1_reserved(*p), "Error");
pss->push_on_queue(p);
}
continue;
......@@ -3962,6 +3964,7 @@ public:
G1ParScanExtRootClosure only_scan_root_cl(_g1h, &pss);
G1ParScanPermClosure only_scan_perm_cl(_g1h, &pss);
G1ParScanHeapRSClosure only_scan_heap_rs_cl(_g1h, &pss);
G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss);
G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss);
G1ParScanAndMarkPermClosure scan_mark_perm_cl(_g1h, &pss);
......@@ -3985,7 +3988,7 @@ public:
_g1h->g1_process_strong_roots(/* not collecting perm */ false,
SharedHeap::SO_AllClasses,
scan_root_cl,
&only_scan_heap_rs_cl,
&push_heap_rs_cl,
scan_so_cl,
scan_perm_cl,
i);
......
......@@ -1623,7 +1623,7 @@ public:
template <class T> void push_on_queue(T* ref) {
assert(ref != NULL, "invariant");
assert(has_partial_array_mask(ref) ||
_g1h->obj_in_cs(oopDesc::load_decode_heap_oop(ref)), "invariant");
_g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(ref)), "invariant");
#ifdef ASSERT
if (has_partial_array_mask(ref)) {
oop p = clear_partial_array_mask(ref);
......@@ -1644,9 +1644,9 @@ public:
assert((oop*)ref != NULL, "pop_local() returned true");
assert(UseCompressedOops || !ref.is_narrow(), "Error");
assert(has_partial_array_mask((oop*)ref) ||
_g1h->obj_in_cs(ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)ref)
: oopDesc::load_decode_heap_oop((oop*)ref)),
"invariant");
_g1h->is_in_g1_reserved(ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)ref)
: oopDesc::load_decode_heap_oop((oop*)ref)),
"invariant");
IF_G1_DETAILED_STATS(note_pop());
} else {
StarTask null_task;
......@@ -1659,9 +1659,9 @@ public:
assert((oop*)new_ref != NULL, "pop() from a local non-empty stack");
assert(UseCompressedOops || !new_ref.is_narrow(), "Error");
assert(has_partial_array_mask((oop*)new_ref) ||
_g1h->obj_in_cs(new_ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)new_ref)
: oopDesc::load_decode_heap_oop((oop*)new_ref)),
"invariant");
_g1h->is_in_g1_reserved(new_ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)new_ref)
: oopDesc::load_decode_heap_oop((oop*)new_ref)),
"invariant");
ref = new_ref;
}
......@@ -1825,12 +1825,12 @@ public:
assert(UseCompressedOops, "Error");
narrowOop* p = (narrowOop*)ref_to_scan;
assert(!has_partial_array_mask(p) &&
_g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
_g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "sanity");
deal_with_reference(p);
} else {
oop* p = (oop*)ref_to_scan;
assert((has_partial_array_mask(p) && _g1h->obj_in_cs(clear_partial_array_mask(p))) ||
_g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
assert((has_partial_array_mask(p) && _g1h->is_in_g1_reserved(clear_partial_array_mask(p))) ||
_g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "sanity");
deal_with_reference(p);
}
}
......@@ -1844,12 +1844,12 @@ public:
assert(UseCompressedOops, "Error");
narrowOop* p = (narrowOop*)ref_to_scan;
assert(!has_partial_array_mask(p) &&
_g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
_g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "sanity");
deal_with_reference(p);
} else {
oop* p = (oop*)ref_to_scan;
assert((has_partial_array_mask(p) && _g1h->obj_in_cs(clear_partial_array_mask(p))) ||
_g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
_g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "sanity");
deal_with_reference(p);
}
}
......
......@@ -205,6 +205,7 @@ G1CollectorPolicy::G1CollectorPolicy() :
// policy is created before the heap, we have to set this up here,
// so it's done as soon as possible.
HeapRegion::setup_heap_region_size(Arguments::min_heap_size());
HeapRegionRemSet::setup_remset_size();
_recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime());
_prev_collection_pause_end_ms = os::elapsedTime() * 1000.0;
......
......@@ -53,6 +53,15 @@ public:
bool apply_to_weak_ref_discovered_field() { return true; }
};
class G1ParPushHeapRSClosure : public G1ParClosureSuper {
public:
G1ParPushHeapRSClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
G1ParClosureSuper(g1, par_scan_state) { }
template <class T> void do_oop_nv(T* p);
virtual void do_oop(oop* p) { do_oop_nv(p); }
virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
};
class G1ParScanClosure : public G1ParClosureSuper {
public:
G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
......@@ -100,7 +109,7 @@ public:
};
template<bool do_gen_barrier, G1Barrier barrier,
bool do_mark_forwardee, bool skip_cset_test>
bool do_mark_forwardee>
class G1ParCopyClosure : public G1ParCopyHelper {
G1ParScanClosure _scanner;
template <class T> void do_oop_work(T* p);
......@@ -116,12 +125,13 @@ public:
virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
};
typedef G1ParCopyClosure<false, G1BarrierNone, false, false> G1ParScanExtRootClosure;
typedef G1ParCopyClosure<true, G1BarrierNone, false, false> G1ParScanPermClosure;
typedef G1ParCopyClosure<false, G1BarrierRS, false, false> G1ParScanHeapRSClosure;
typedef G1ParCopyClosure<false, G1BarrierNone, true, false> G1ParScanAndMarkExtRootClosure;
typedef G1ParCopyClosure<true, G1BarrierNone, true, false> G1ParScanAndMarkPermClosure;
typedef G1ParCopyClosure<false, G1BarrierRS, true, false> G1ParScanAndMarkHeapRSClosure;
typedef G1ParCopyClosure<false, G1BarrierNone, false> G1ParScanExtRootClosure;
typedef G1ParCopyClosure<true, G1BarrierNone, false> G1ParScanPermClosure;
typedef G1ParCopyClosure<false, G1BarrierRS, false> G1ParScanHeapRSClosure;
typedef G1ParCopyClosure<false, G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
typedef G1ParCopyClosure<true, G1BarrierNone, true> G1ParScanAndMarkPermClosure;
typedef G1ParCopyClosure<false, G1BarrierRS, true> G1ParScanAndMarkHeapRSClosure;
// This is the only case when we set skip_cset_test. Basically, this
// closure is (should?) only be called directly while we're draining
// the overflow and task queues. In that case we know that the
......@@ -132,7 +142,7 @@ typedef G1ParCopyClosure<false, G1BarrierRS, true, false> G1ParScanAndMarkHea
// We need a separate closure to handle references during evacuation
// failure processing, as we cannot asume that the reference already
// points into the collection set (like G1ParScanHeapEvacClosure does).
typedef G1ParCopyClosure<false, G1BarrierEvac, false, false> G1ParScanHeapEvacFailureClosure;
typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacFailureClosure;
class FilterIntoCSClosure: public OopClosure {
G1CollectedHeap* _g1;
......
......@@ -104,3 +104,16 @@ template <class T> inline void G1ParScanClosure::do_oop_nv(T* p) {
}
}
}
template <class T> inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
T heap_oop = oopDesc::load_heap_oop(p);
if (!oopDesc::is_null(heap_oop)) {
oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
if (_g1->in_cset_fast_test(obj)) {
Prefetch::write(obj->mark_addr(), 0);
Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
_par_scan_state->push_on_queue(p);
}
}
}
......@@ -155,8 +155,8 @@ class ScanRSClosure : public HeapRegionClosure {
G1BlockOffsetSharedArray* _bot_shared;
CardTableModRefBS *_ct_bs;
int _worker_i;
int _block_size;
bool _try_claimed;
size_t _min_skip_distance, _max_skip_distance;
public:
ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) :
_oc(oc),
......@@ -168,8 +168,7 @@ public:
_g1h = G1CollectedHeap::heap();
_bot_shared = _g1h->bot_shared();
_ct_bs = (CardTableModRefBS*) (_g1h->barrier_set());
_min_skip_distance = 16;
_max_skip_distance = 2 * _g1h->n_par_threads() * _min_skip_distance;
_block_size = MAX2<int>(G1RSetScanBlockSize, 1);
}
void set_try_claimed() { _try_claimed = true; }
......@@ -225,12 +224,15 @@ public:
HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i);
hrrs->init_iterator(iter);
size_t card_index;
size_t skip_distance = 0, current_card = 0, jump_to_card = 0;
while (iter->has_next(card_index)) {
if (current_card < jump_to_card) {
++current_card;
continue;
// We claim cards in block so as to recude the contention. The block size is determined by
// the G1RSetScanBlockSize parameter.
size_t jump_to_card = hrrs->iter_claimed_next(_block_size);
for (size_t current_card = 0; iter->has_next(card_index); current_card++) {
if (current_card >= jump_to_card + _block_size) {
jump_to_card = hrrs->iter_claimed_next(_block_size);
}
if (current_card < jump_to_card) continue;
HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index);
#if 0
gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n",
......@@ -247,22 +249,14 @@ public:
// If the card is dirty, then we will scan it during updateRS.
if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) {
if (!_ct_bs->is_card_claimed(card_index) && _ct_bs->claim_card(card_index)) {
scanCard(card_index, card_region);
} else if (_try_claimed) {
if (jump_to_card == 0 || jump_to_card != current_card) {
// We did some useful work in the previous iteration.
// Decrease the distance.
skip_distance = MAX2(skip_distance >> 1, _min_skip_distance);
} else {
// Previous iteration resulted in a claim failure.
// Increase the distance.
skip_distance = MIN2(skip_distance << 1, _max_skip_distance);
}
jump_to_card = current_card + skip_distance;
}
// We make the card as "claimed" lazily (so races are possible but they're benign),
// which reduces the number of duplicate scans (the rsets of the regions in the cset
// can intersect).
if (!_ct_bs->is_card_claimed(card_index)) {
_ct_bs->set_card_claimed(card_index);
scanCard(card_index, card_region);
}
}
++current_card;
}
if (!_try_claimed) {
hrrs->set_iter_complete();
......@@ -299,30 +293,18 @@ void HRInto_G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
double rs_time_start = os::elapsedTime();
HeapRegion *startRegion = calculateStartRegion(worker_i);
BufferingOopsInHeapRegionClosure boc(oc);
ScanRSClosure scanRScl(&boc, worker_i);
ScanRSClosure scanRScl(oc, worker_i);
_g1->collection_set_iterate_from(startRegion, &scanRScl);
scanRScl.set_try_claimed();
_g1->collection_set_iterate_from(startRegion, &scanRScl);
boc.done();
double closure_app_time_sec = boc.closure_app_seconds();
double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
closure_app_time_sec;
double closure_app_time_ms = closure_app_time_sec * 1000.0;
double scan_rs_time_sec = os::elapsedTime() - rs_time_start;
assert( _cards_scanned != NULL, "invariant" );
_cards_scanned[worker_i] = scanRScl.cards_done();
_g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0);
_g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i);
if (scan_new_refs_time_ms > 0.0) {
closure_app_time_ms += scan_new_refs_time_ms;
}
_g1p->record_obj_copy_time(worker_i, closure_app_time_ms);
}
void HRInto_G1RemSet::updateRS(int worker_i) {
......@@ -449,9 +431,8 @@ HRInto_G1RemSet::scanNewRefsRS_work(OopsInHeapRegionClosure* oc,
oc->do_oop(p);
}
}
_g1p->record_scan_new_refs_time(worker_i,
(os::elapsedTime() - scan_new_refs_start_sec)
* 1000.0);
double scan_new_refs_time_ms = (os::elapsedTime() - scan_new_refs_start_sec) * 1000.0;
_g1p->record_scan_new_refs_time(worker_i, scan_new_refs_time_ms);
}
void HRInto_G1RemSet::cleanupHRRS() {
......
......@@ -207,8 +207,20 @@
develop(bool, G1PrintOopAppls, false, \
"When true, print applications of closures to external locs.") \
\
develop(intx, G1LogRSRegionEntries, 7, \
"Log_2 of max number of regions for which we keep bitmaps.") \
develop(intx, G1RSetRegionEntriesBase, 256, \
"Max number of regions in a fine-grain table per MB.") \
\
product(intx, G1RSetRegionEntries, 0, \
"Max number of regions for which we keep bitmaps." \
"Will be set ergonomically by default") \
\
develop(intx, G1RSetSparseRegionEntriesBase, 4, \
"Max number of entries per region in a sparse table " \
"per MB.") \
\
product(intx, G1RSetSparseRegionEntries, 0, \
"Max number of entries per region in a sparse table." \
"Will be set ergonomically by default.") \
\
develop(bool, G1RecordHRRSOops, false, \
"When true, record recent calls to rem set operations.") \
......@@ -293,6 +305,10 @@
develop(bool, G1VerifyCTCleanup, false, \
"Verify card table cleanup.") \
\
product(uintx, G1RSetScanBlockSize, 64, \
"Size of a work unit of cards claimed by a worker thread" \
"during RSet scanning.") \
\
develop(bool, ReduceInitialCardMarksForG1, false, \
"When ReduceInitialCardMarks is true, this flag setting " \
" controls whether G1 allows the RICM optimization")
......
......@@ -33,11 +33,12 @@ enum G1Barrier {
};
template<bool do_gen_barrier, G1Barrier barrier,
bool do_mark_forwardee, bool skip_cset_test>
bool do_mark_forwardee>
class G1ParCopyClosure;
class G1ParScanClosure;
class G1ParPushHeapRSClosure;
typedef G1ParCopyClosure<false, G1BarrierEvac, false, true> G1ParScanHeapEvacClosure;
typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
class FilterIntoCSClosure;
class FilterOutOfRegionClosure;
......@@ -51,6 +52,7 @@ class FilterAndMarkInHeapRegionAndIntoCSClosure;
#define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \
f(G1ParScanHeapEvacClosure,_nv) \
f(G1ParScanClosure,_nv) \
f(G1ParPushHeapRSClosure,_nv) \
f(FilterIntoCSClosure,_nv) \
f(FilterOutOfRegionClosure,_nv) \
f(FilterInHeapRegionAndIntoCSClosure,_nv) \
......
......@@ -505,12 +505,13 @@ OtherRegionsTable::OtherRegionsTable(HeapRegion* hr) :
typedef PosParPRT* PosParPRTPtr;
if (_max_fine_entries == 0) {
assert(_mod_max_fine_entries_mask == 0, "Both or none.");
_max_fine_entries = (size_t)(1 << G1LogRSRegionEntries);
size_t max_entries_log = (size_t)log2_long((jlong)G1RSetRegionEntries);
_max_fine_entries = (size_t)(1 << max_entries_log);
_mod_max_fine_entries_mask = _max_fine_entries - 1;
#if SAMPLE_FOR_EVICTION
assert(_fine_eviction_sample_size == 0
&& _fine_eviction_stride == 0, "All init at same time.");
_fine_eviction_sample_size = MAX2((size_t)4, (size_t)G1LogRSRegionEntries);
_fine_eviction_sample_size = MAX2((size_t)4, max_entries_log);
_fine_eviction_stride = _max_fine_entries / _fine_eviction_sample_size;
#endif
}
......@@ -655,13 +656,6 @@ void OtherRegionsTable::add_reference(OopOrNarrowOopStar from, int tid) {
#endif
}
// Otherwise, transfer from sparse to fine-grain.
CardIdx_t cards[SparsePRTEntry::CardsPerEntry];
if (G1HRRSUseSparseTable) {
bool res = _sparse_table.get_cards(from_hrs_ind, &cards[0]);
assert(res, "There should have been an entry");
}
if (_n_fine_entries == _max_fine_entries) {
prt = delete_region_table();
} else {
......@@ -676,10 +670,12 @@ void OtherRegionsTable::add_reference(OopOrNarrowOopStar from, int tid) {
_fine_grain_regions[ind] = prt;
_n_fine_entries++;
// Add in the cards from the sparse table.
if (G1HRRSUseSparseTable) {
for (int i = 0; i < SparsePRTEntry::CardsPerEntry; i++) {
CardIdx_t c = cards[i];
// Transfer from sparse to fine-grain.
SparsePRTEntry *sprt_entry = _sparse_table.get_entry(from_hrs_ind);
assert(sprt_entry != NULL, "There should have been an entry");
for (int i = 0; i < SparsePRTEntry::cards_num(); i++) {
CardIdx_t c = sprt_entry->card(i);
if (c != SparsePRTEntry::NullEntry) {
prt->add_card(c);
}
......@@ -1084,6 +1080,19 @@ HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
{}
void HeapRegionRemSet::setup_remset_size() {
// Setup sparse and fine-grain tables sizes.
// table_size = base * (log(region_size / 1M) + 1)
int region_size_log_mb = MAX2((int)HeapRegion::LogOfHRGrainBytes - (int)LOG_M, 0);
if (FLAG_IS_DEFAULT(G1RSetSparseRegionEntries)) {
G1RSetSparseRegionEntries = G1RSetSparseRegionEntriesBase * (region_size_log_mb + 1);
}
if (FLAG_IS_DEFAULT(G1RSetRegionEntries)) {
G1RSetRegionEntries = G1RSetRegionEntriesBase * (region_size_log_mb + 1);
}
guarantee(G1RSetSparseRegionEntries > 0 && G1RSetRegionEntries > 0 , "Sanity");
}
void HeapRegionRemSet::init_for_par_iteration() {
_iter_state = Unclaimed;
}
......@@ -1399,7 +1408,7 @@ void HeapRegionRemSet::test() {
os::sleep(Thread::current(), (jlong)5000, false);
G1CollectedHeap* g1h = G1CollectedHeap::heap();
// Run with "-XX:G1LogRSRegionEntries=2", so that 1 and 5 end up in same
// Run with "-XX:G1LogRSetRegionEntries=2", so that 1 and 5 end up in same
// hash bucket.
HeapRegion* hr0 = g1h->region_at(0);
HeapRegion* hr1 = g1h->region_at(1);
......
......@@ -187,7 +187,8 @@ private:
void clear_outgoing_entries();
enum ParIterState { Unclaimed, Claimed, Complete };
ParIterState _iter_state;
volatile ParIterState _iter_state;
volatile jlong _iter_claimed;
// Unused unless G1RecordHRRSOops is true.
......@@ -209,6 +210,7 @@ public:
HeapRegion* hr);
static int num_par_rem_sets();
static void setup_remset_size();
HeapRegion* hr() const {
return _other_regions.hr();
......@@ -272,6 +274,19 @@ public:
// Returns "true" iff the region's iteration is complete.
bool iter_is_complete();
// Support for claiming blocks of cards during iteration
void set_iter_claimed(size_t x) { _iter_claimed = (jlong)x; }
size_t iter_claimed() const { return (size_t)_iter_claimed; }
// Claim the next block of cards
size_t iter_claimed_next(size_t step) {
size_t current, next;
do {
current = iter_claimed();
next = current + step;
} while (Atomic::cmpxchg((jlong)next, &_iter_claimed, (jlong)current) != (jlong)current);
return current;
}
// Initialize the given iterator to iterate over this rem set.
void init_iterator(HeapRegionRemSetIterator* iter) const;
......
......@@ -27,7 +27,7 @@
#define SPARSE_PRT_VERBOSE 0
#define UNROLL_CARD_LOOPS 1
#define UNROLL_CARD_LOOPS 1
void SparsePRT::init_iterator(SparsePRTIter* sprt_iter) {
sprt_iter->init(this);
......@@ -36,27 +36,32 @@ void SparsePRT::init_iterator(SparsePRTIter* sprt_iter) {
void SparsePRTEntry::init(RegionIdx_t region_ind) {
_region_ind = region_ind;
_next_index = NullEntry;
#if UNROLL_CARD_LOOPS
assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll.");
_cards[0] = NullEntry;
_cards[1] = NullEntry;
_cards[2] = NullEntry;
_cards[3] = NullEntry;
assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry");
for (int i = 0; i < cards_num(); i += UnrollFactor) {
_cards[i] = NullEntry;
_cards[i + 1] = NullEntry;
_cards[i + 2] = NullEntry;
_cards[i + 3] = NullEntry;
}
#else
for (int i = 0; i < CardsPerEntry; i++)
for (int i = 0; i < cards_num(); i++)
_cards[i] = NullEntry;
#endif
}
bool SparsePRTEntry::contains_card(CardIdx_t card_index) const {
#if UNROLL_CARD_LOOPS
assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll.");
if (_cards[0] == card_index) return true;
if (_cards[1] == card_index) return true;
if (_cards[2] == card_index) return true;
if (_cards[3] == card_index) return true;
assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry");
for (int i = 0; i < cards_num(); i += UnrollFactor) {
if (_cards[i] == card_index ||
_cards[i + 1] == card_index ||
_cards[i + 2] == card_index ||
_cards[i + 3] == card_index) return true;
}
#else
for (int i = 0; i < CardsPerEntry; i++) {
for (int i = 0; i < cards_num(); i++) {
if (_cards[i] == card_index) return true;
}
#endif
......@@ -67,14 +72,16 @@ bool SparsePRTEntry::contains_card(CardIdx_t card_index) const {
int SparsePRTEntry::num_valid_cards() const {
int sum = 0;
#if UNROLL_CARD_LOOPS
assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll.");
if (_cards[0] != NullEntry) sum++;
if (_cards[1] != NullEntry) sum++;
if (_cards[2] != NullEntry) sum++;
if (_cards[3] != NullEntry) sum++;
assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry");
for (int i = 0; i < cards_num(); i += UnrollFactor) {
sum += (_cards[i] != NullEntry);
sum += (_cards[i + 1] != NullEntry);
sum += (_cards[i + 2] != NullEntry);
sum += (_cards[i + 3] != NullEntry);
}
#else
for (int i = 0; i < CardsPerEntry; i++) {
if (_cards[i] != NulLEntry) sum++;
for (int i = 0; i < cards_num(); i++) {
sum += (_cards[i] != NullEntry);
}
#endif
// Otherwise, we're full.
......@@ -83,27 +90,27 @@ int SparsePRTEntry::num_valid_cards() const {
SparsePRTEntry::AddCardResult SparsePRTEntry::add_card(CardIdx_t card_index) {
#if UNROLL_CARD_LOOPS
assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll.");
CardIdx_t c = _cards[0];
if (c == card_index) return found;
if (c == NullEntry) { _cards[0] = card_index; return added; }
c = _cards[1];
if (c == card_index) return found;
if (c == NullEntry) { _cards[1] = card_index; return added; }
c = _cards[2];
if (c == card_index) return found;
if (c == NullEntry) { _cards[2] = card_index; return added; }
c = _cards[3];
if (c == card_index) return found;
if (c == NullEntry) { _cards[3] = card_index; return added; }
assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry");
CardIdx_t c;
for (int i = 0; i < cards_num(); i += UnrollFactor) {
c = _cards[i];
if (c == card_index) return found;
if (c == NullEntry) { _cards[i] = card_index; return added; }
c = _cards[i + 1];
if (c == card_index) return found;
if (c == NullEntry) { _cards[i + 1] = card_index; return added; }
c = _cards[i + 2];
if (c == card_index) return found;
if (c == NullEntry) { _cards[i + 2] = card_index; return added; }
c = _cards[i + 3];
if (c == card_index) return found;
if (c == NullEntry) { _cards[i + 3] = card_index; return added; }
}
#else
for (int i = 0; i < CardsPerEntry; i++) {
for (int i = 0; i < cards_num(); i++) {
CardIdx_t c = _cards[i];
if (c == card_index) return found;
if (c == NullEntry) {
_cards[i] = card_index;
return added;
}
if (c == NullEntry) { _cards[i] = card_index; return added; }
}
#endif
// Otherwise, we're full.
......@@ -112,13 +119,15 @@ SparsePRTEntry::AddCardResult SparsePRTEntry::add_card(CardIdx_t card_index) {
void SparsePRTEntry::copy_cards(CardIdx_t* cards) const {
#if UNROLL_CARD_LOOPS
assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll.");
cards[0] = _cards[0];
cards[1] = _cards[1];
cards[2] = _cards[2];
cards[3] = _cards[3];
assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry");
for (int i = 0; i < cards_num(); i += UnrollFactor) {
cards[i] = _cards[i];
cards[i + 1] = _cards[i + 1];
cards[i + 2] = _cards[i + 2];
cards[i + 3] = _cards[i + 3];
}
#else
for (int i = 0; i < CardsPerEntry; i++) {
for (int i = 0; i < cards_num(); i++) {
cards[i] = _cards[i];
}
#endif
......@@ -133,7 +142,7 @@ void SparsePRTEntry::copy_cards(SparsePRTEntry* e) const {
RSHashTable::RSHashTable(size_t capacity) :
_capacity(capacity), _capacity_mask(capacity-1),
_occupied_entries(0), _occupied_cards(0),
_entries(NEW_C_HEAP_ARRAY(SparsePRTEntry, capacity)),
_entries((SparsePRTEntry*)NEW_C_HEAP_ARRAY(char, SparsePRTEntry::size() * capacity)),
_buckets(NEW_C_HEAP_ARRAY(int, capacity)),
_free_list(NullEntry), _free_region(0)
{
......@@ -161,8 +170,8 @@ void RSHashTable::clear() {
"_capacity too large");
// This will put -1 == NullEntry in the key field of all entries.
memset(_entries, -1, _capacity * sizeof(SparsePRTEntry));
memset(_buckets, -1, _capacity * sizeof(int));
memset(_entries, NullEntry, _capacity * SparsePRTEntry::size());
memset(_buckets, NullEntry, _capacity * sizeof(int));
_free_list = NullEntry;
_free_region = 0;
}
......@@ -175,8 +184,8 @@ bool RSHashTable::add_card(RegionIdx_t region_ind, CardIdx_t card_index) {
if (res == SparsePRTEntry::added) _occupied_cards++;
#if SPARSE_PRT_VERBOSE
gclog_or_tty->print_cr(" after add_card[%d]: valid-cards = %d.",
pointer_delta(e, _entries, sizeof(SparsePRTEntry)),
e->num_valid_cards());
pointer_delta(e, _entries, SparsePRTEntry::size()),
e->num_valid_cards());
#endif
assert(e->num_valid_cards() > 0, "Postcondition");
return res != SparsePRTEntry::overflow;
......@@ -199,6 +208,22 @@ bool RSHashTable::get_cards(RegionIdx_t region_ind, CardIdx_t* cards) {
return true;
}
SparsePRTEntry* RSHashTable::get_entry(RegionIdx_t region_ind) {
int ind = (int) (region_ind & capacity_mask());
int cur_ind = _buckets[ind];
SparsePRTEntry* cur;
while (cur_ind != NullEntry &&
(cur = entry(cur_ind))->r_ind() != region_ind) {
cur_ind = cur->next_index();
}
if (cur_ind == NullEntry) return NULL;
// Otherwise...
assert(cur->r_ind() == region_ind, "Postcondition of loop + test above.");
assert(cur->num_valid_cards() > 0, "Inv");
return cur;
}
bool RSHashTable::delete_entry(RegionIdx_t region_ind) {
int ind = (int) (region_ind & capacity_mask());
int* prev_loc = &_buckets[ind];
......@@ -225,20 +250,8 @@ RSHashTable::entry_for_region_ind(RegionIdx_t region_ind) const {
int ind = (int) (region_ind & capacity_mask());
int cur_ind = _buckets[ind];
SparsePRTEntry* cur;
// XXX
// int k = 0;
while (cur_ind != NullEntry &&
(cur = entry(cur_ind))->r_ind() != region_ind) {
/*
k++;
if (k > 10) {
gclog_or_tty->print_cr("RSHashTable::entry_for_region_ind(%d): "
"k = %d, cur_ind = %d.", region_ind, k, cur_ind);
if (k >= 1000) {
while (1) ;
}
}
*/
cur_ind = cur->next_index();
}
......@@ -319,7 +332,7 @@ size_t /* RSHashTable:: */ RSHashTableIter::compute_card_ind(CardIdx_t ci) {
bool /* RSHashTable:: */ RSHashTableIter::has_next(size_t& card_index) {
_card_ind++;
CardIdx_t ci;
if (_card_ind < SparsePRTEntry::CardsPerEntry &&
if (_card_ind < SparsePRTEntry::cards_num() &&
((ci = _rsht->entry(_bl_ind)->card(_card_ind)) !=
SparsePRTEntry::NullEntry)) {
card_index = compute_card_ind(ci);
......@@ -359,7 +372,7 @@ bool RSHashTable::contains_card(RegionIdx_t region_index, CardIdx_t card_index)
size_t RSHashTable::mem_size() const {
return sizeof(this) +
capacity() * (sizeof(SparsePRTEntry) + sizeof(int));
capacity() * (SparsePRTEntry::size() + sizeof(int));
}
// ----------------------------------------------------------------------
......@@ -446,6 +459,10 @@ bool SparsePRT::get_cards(RegionIdx_t region_id, CardIdx_t* cards) {
return _next->get_cards(region_id, cards);
}
SparsePRTEntry* SparsePRT::get_entry(RegionIdx_t region_id) {
return _next->get_entry(region_id);
}
bool SparsePRT::delete_entry(RegionIdx_t region_id) {
return _next->delete_entry(region_id);
}
......
......@@ -32,21 +32,28 @@
// insertions only enqueue old versions for deletions, but do not delete
// old versions synchronously.
class SparsePRTEntry: public CHeapObj {
public:
enum SomePublicConstants {
CardsPerEntry = 4,
NullEntry = -1
NullEntry = -1,
UnrollFactor = 4
};
private:
RegionIdx_t _region_ind;
int _next_index;
CardIdx_t _cards[CardsPerEntry];
CardIdx_t _cards[1];
// WARNING: Don't put any data members beyond this line. Card array has, in fact, variable length.
// It should always be the last data member.
public:
// Returns the size of the entry, used for entry allocation.
static size_t size() { return sizeof(SparsePRTEntry) + sizeof(CardIdx_t) * (cards_num() - 1); }
// Returns the size of the card array.
static int cards_num() {
// The number of cards should be a multiple of 4, because that's our current
// unrolling factor.
static const int s = MAX2<int>(G1RSetSparseRegionEntries & ~(UnrollFactor - 1), UnrollFactor);
return s;
}
// Set the region_ind to the given value, and delete all cards.
inline void init(RegionIdx_t region_ind);
......@@ -134,12 +141,15 @@ public:
bool add_card(RegionIdx_t region_id, CardIdx_t card_index);
bool get_cards(RegionIdx_t region_id, CardIdx_t* cards);
bool delete_entry(RegionIdx_t region_id);
bool contains_card(RegionIdx_t region_id, CardIdx_t card_index) const;
void add_entry(SparsePRTEntry* e);
SparsePRTEntry* get_entry(RegionIdx_t region_id);
void clear();
size_t capacity() const { return _capacity; }
......@@ -148,7 +158,7 @@ public:
size_t occupied_cards() const { return _occupied_cards; }
size_t mem_size() const;
SparsePRTEntry* entry(int i) const { return &_entries[i]; }
SparsePRTEntry* entry(int i) const { return (SparsePRTEntry*)((char*)_entries + SparsePRTEntry::size() * i); }
void print();
};
......@@ -157,7 +167,7 @@ public:
class RSHashTableIter VALUE_OBJ_CLASS_SPEC {
int _tbl_ind; // [-1, 0.._rsht->_capacity)
int _bl_ind; // [-1, 0.._rsht->_capacity)
short _card_ind; // [0..CardsPerEntry)
short _card_ind; // [0..SparsePRTEntry::cards_num())
RSHashTable* _rsht;
size_t _heap_bot_card_ind;
......@@ -176,7 +186,7 @@ public:
RSHashTableIter(size_t heap_bot_card_ind) :
_tbl_ind(RSHashTable::NullEntry),
_bl_ind(RSHashTable::NullEntry),
_card_ind((SparsePRTEntry::CardsPerEntry-1)),
_card_ind((SparsePRTEntry::cards_num() - 1)),
_rsht(NULL),
_heap_bot_card_ind(heap_bot_card_ind)
{}
......@@ -185,7 +195,7 @@ public:
_rsht = rsht;
_tbl_ind = -1; // So that first increment gets to 0.
_bl_ind = RSHashTable::NullEntry;
_card_ind = (SparsePRTEntry::CardsPerEntry-1);
_card_ind = (SparsePRTEntry::cards_num() - 1);
}
bool has_next(size_t& card_index);
......@@ -241,9 +251,13 @@ public:
// If the table hold an entry for "region_ind", Copies its
// cards into "cards", which must be an array of length at least
// "CardsPerEntry", and returns "true"; otherwise, returns "false".
// "SparePRTEntry::cards_num()", and returns "true"; otherwise,
// returns "false".
bool get_cards(RegionIdx_t region_ind, CardIdx_t* cards);
// Return the pointer to the entry associated with the given region.
SparsePRTEntry* get_entry(RegionIdx_t region_ind);
// If there is an entry for "region_ind", removes it and return "true";
// otherwise returns "false."
bool delete_entry(RegionIdx_t region_ind);
......
......@@ -339,6 +339,16 @@ public:
return (val & (clean_card_mask_val() | claimed_card_val())) == claimed_card_val();
}
void set_card_claimed(size_t card_index) {
jbyte val = _byte_map[card_index];
if (val == clean_card_val()) {
val = (jbyte)claimed_card_val();
} else {
val |= (jbyte)claimed_card_val();
}
_byte_map[card_index] = val;
}
bool claim_card(size_t card_index);
bool is_card_clean(size_t card_index) {
......
......@@ -139,6 +139,10 @@ const size_t M = K*K;
const size_t G = M*K;
const size_t HWperKB = K / sizeof(HeapWord);
const size_t LOG_K = 10;
const size_t LOG_M = 2 * LOG_K;
const size_t LOG_G = 2 * LOG_M;
const jint min_jint = (jint)1 << (sizeof(jint)*BitsPerByte-1); // 0x80000000 == smallest jint
const jint max_jint = (juint)min_jint - 1; // 0x7FFFFFFF == largest jint
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册