提交 2c49aee0 编写于 作者: I iveresov

6484957: G1: parallel concurrent refinement

6826318: G1: remove traversal-based refinement code
Summary: Removed traversal-based refinement code as it's no longer used. Made the concurrent refinement (queue-based) parallel.
Reviewed-by: tonyp
上级 7a27a00c
...@@ -4454,43 +4454,26 @@ void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val ...@@ -4454,43 +4454,26 @@ void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val
delayed()->nop(); delayed()->nop();
} }
// Now we decide how to generate the card table write. If we're // If the "store_addr" register is an "in" or "local" register, move it to
// enqueueing, we call out to a generated function. Otherwise, we do it // a scratch reg so we can pass it as an argument.
// inline here. bool use_scr = !(store_addr->is_global() || store_addr->is_out());
// Pick a scratch register different from "tmp".
if (G1RSBarrierUseQueue) { Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
// If the "store_addr" register is an "in" or "local" register, move it to // Make sure we use up the delay slot!
// a scratch reg so we can pass it as an argument. if (use_scr) {
bool use_scr = !(store_addr->is_global() || store_addr->is_out()); post_filter_masm->mov(store_addr, scr);
// Pick a scratch register different from "tmp".
Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
// Make sure we use up the delay slot!
if (use_scr) {
post_filter_masm->mov(store_addr, scr);
} else {
post_filter_masm->nop();
}
generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
save_frame(0);
call(dirty_card_log_enqueue);
if (use_scr) {
delayed()->mov(scr, O0);
} else {
delayed()->mov(store_addr->after_save(), O0);
}
restore();
} else { } else {
post_filter_masm->nop();
#ifdef _LP64
post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr);
#else
post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr);
#endif
assert(tmp != store_addr, "need separate temp reg");
set(bs->byte_map_base, tmp);
stb(G0, tmp, store_addr);
} }
generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
save_frame(0);
call(dirty_card_log_enqueue);
if (use_scr) {
delayed()->mov(scr, O0);
} else {
delayed()->mov(store_addr->after_save(), O0);
}
restore();
bind(filtered); bind(filtered);
......
...@@ -25,23 +25,26 @@ ...@@ -25,23 +25,26 @@
#include "incls/_precompiled.incl" #include "incls/_precompiled.incl"
#include "incls/_concurrentG1Refine.cpp.incl" #include "incls/_concurrentG1Refine.cpp.incl"
bool ConcurrentG1Refine::_enabled = false;
ConcurrentG1Refine::ConcurrentG1Refine() : ConcurrentG1Refine::ConcurrentG1Refine() :
_pya(PYA_continue), _last_pya(PYA_continue),
_last_cards_during(), _first_traversal(false),
_card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL), _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL),
_hot_cache(NULL), _hot_cache(NULL),
_def_use_cache(false), _use_cache(false), _def_use_cache(false), _use_cache(false),
_n_periods(0), _total_cards(0), _total_travs(0) _n_periods(0), _total_cards(0), _total_travs(0),
_threads(NULL), _n_threads(0)
{ {
if (G1ConcRefine) { if (G1ConcRefine) {
_cg1rThread = new ConcurrentG1RefineThread(this); _n_threads = (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads;
assert(cg1rThread() != NULL, "Conc refine should have been created"); if (_n_threads > 0) {
assert(cg1rThread()->cg1r() == this, _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
"Conc refine thread should refer to this"); ConcurrentG1RefineThread *next = NULL;
} else { for (int i = _n_threads - 1; i >= 0; i--) {
_cg1rThread = NULL; ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, i);
assert(t != NULL, "Conc refine should have been created");
assert(t->cg1r() == this, "Conc refine thread should refer to this");
_threads[i] = t;
next = t;
}
}
} }
} }
...@@ -75,6 +78,14 @@ void ConcurrentG1Refine::init() { ...@@ -75,6 +78,14 @@ void ConcurrentG1Refine::init() {
} }
} }
void ConcurrentG1Refine::stop() {
if (_threads != NULL) {
for (int i = 0; i < _n_threads; i++) {
_threads[i]->stop();
}
}
}
ConcurrentG1Refine::~ConcurrentG1Refine() { ConcurrentG1Refine::~ConcurrentG1Refine() {
if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
assert(_card_counts != NULL, "Logic"); assert(_card_counts != NULL, "Logic");
...@@ -88,104 +99,22 @@ ConcurrentG1Refine::~ConcurrentG1Refine() { ...@@ -88,104 +99,22 @@ ConcurrentG1Refine::~ConcurrentG1Refine() {
assert(_hot_cache != NULL, "Logic"); assert(_hot_cache != NULL, "Logic");
FREE_C_HEAP_ARRAY(jbyte*, _hot_cache); FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
} }
} if (_threads != NULL) {
for (int i = 0; i < _n_threads; i++) {
bool ConcurrentG1Refine::refine() { delete _threads[i];
G1CollectedHeap* g1h = G1CollectedHeap::heap(); }
unsigned cards_before = g1h->g1_rem_set()->conc_refine_cards(); FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
clear_hot_cache(); // Any previous values in this are now invalid.
g1h->g1_rem_set()->concurrentRefinementPass(this);
_traversals++;
unsigned cards_after = g1h->g1_rem_set()->conc_refine_cards();
unsigned cards_during = cards_after-cards_before;
// If this is the first traversal in the current enabling
// and we did some cards, or if the number of cards found is decreasing
// sufficiently quickly, then keep going. Otherwise, sleep a while.
bool res =
(_first_traversal && cards_during > 0)
||
(!_first_traversal && cards_during * 3 < _last_cards_during * 2);
_last_cards_during = cards_during;
_first_traversal = false;
return res;
}
void ConcurrentG1Refine::enable() {
MutexLocker x(G1ConcRefine_mon);
if (!_enabled) {
_enabled = true;
_first_traversal = true; _last_cards_during = 0;
G1ConcRefine_mon->notify_all();
}
}
unsigned ConcurrentG1Refine::disable() {
MutexLocker x(G1ConcRefine_mon);
if (_enabled) {
_enabled = false;
return _traversals;
} else {
return 0;
}
}
void ConcurrentG1Refine::wait_for_ConcurrentG1Refine_enabled() {
G1ConcRefine_mon->lock();
while (!_enabled) {
G1ConcRefine_mon->wait(Mutex::_no_safepoint_check_flag);
}
G1ConcRefine_mon->unlock();
_traversals = 0;
};
void ConcurrentG1Refine::set_pya_restart() {
// If we're using the log-based RS barrier, the above will cause
// in-progress traversals of completed log buffers to quit early; we will
// also abandon all other buffers.
if (G1RSBarrierUseQueue) {
DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
dcqs.abandon_logs();
// Reset the post-yield actions.
_pya = PYA_continue;
_last_pya = PYA_continue;
} else {
_pya = PYA_restart;
} }
} }
void ConcurrentG1Refine::set_pya_cancel() { void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {
_pya = PYA_cancel; if (_threads != NULL) {
} for (int i = 0; i < _n_threads; i++) {
tc->do_thread(_threads[i]);
PostYieldAction ConcurrentG1Refine::get_pya() {
if (_pya != PYA_continue) {
jint val = _pya;
while (true) {
jint val_read = Atomic::cmpxchg(PYA_continue, &_pya, val);
if (val_read == val) {
PostYieldAction res = (PostYieldAction)val;
assert(res != PYA_continue, "Only the refine thread should reset.");
_last_pya = res;
return res;
} else {
val = val_read;
}
} }
} }
// QQQ WELL WHAT DO WE RETURN HERE???
// make up something!
return PYA_continue;
} }
PostYieldAction ConcurrentG1Refine::get_last_pya() {
PostYieldAction res = _last_pya;
_last_pya = PYA_continue;
return res;
}
bool ConcurrentG1Refine::do_traversal() {
return _cg1rThread->do_traversal();
}
int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) { int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) {
size_t card_num = (card_ptr - _ct_bot); size_t card_num = (card_ptr - _ct_bot);
......
...@@ -26,25 +26,9 @@ ...@@ -26,25 +26,9 @@
class ConcurrentG1RefineThread; class ConcurrentG1RefineThread;
class G1RemSet; class G1RemSet;
// What to do after a yield:
enum PostYieldAction {
PYA_continue, // Continue the traversal
PYA_restart, // Restart
PYA_cancel // It's been completed by somebody else: cancel.
};
class ConcurrentG1Refine: public CHeapObj { class ConcurrentG1Refine: public CHeapObj {
ConcurrentG1RefineThread* _cg1rThread; ConcurrentG1RefineThread** _threads;
int _n_threads;
volatile jint _pya;
PostYieldAction _last_pya;
static bool _enabled; // Protected by G1ConcRefine_mon.
unsigned _traversals;
// Number of cards processed during last refinement traversal.
unsigned _first_traversal;
unsigned _last_cards_during;
// The cache for card refinement. // The cache for card refinement.
bool _use_cache; bool _use_cache;
...@@ -74,37 +58,10 @@ class ConcurrentG1Refine: public CHeapObj { ...@@ -74,37 +58,10 @@ class ConcurrentG1Refine: public CHeapObj {
~ConcurrentG1Refine(); ~ConcurrentG1Refine();
void init(); // Accomplish some initialization that has to wait. void init(); // Accomplish some initialization that has to wait.
void stop();
// Enabled Conc refinement, waking up thread if necessary. // Iterate over the conc refine threads
void enable(); void threads_do(ThreadClosure *tc);
// Returns the number of traversals performed since this refiner was enabled.
unsigned disable();
// Requires G1ConcRefine_mon to be held.
bool enabled() { return _enabled; }
// Returns only when G1 concurrent refinement has been enabled.
void wait_for_ConcurrentG1Refine_enabled();
// Do one concurrent refinement pass over the card table. Returns "true"
// if heuristics determine that another pass should be done immediately.
bool refine();
// Indicate that an in-progress refinement pass should start over.
void set_pya_restart();
// Indicate that an in-progress refinement pass should quit.
void set_pya_cancel();
// Get the appropriate post-yield action. Also sets last_pya.
PostYieldAction get_pya();
// The last PYA read by "get_pya".
PostYieldAction get_last_pya();
bool do_traversal();
ConcurrentG1RefineThread* cg1rThread() { return _cg1rThread; }
// If this is the first entry for the slot, writes into the cache and // If this is the first entry for the slot, writes into the cache and
// returns NULL. If it causes an eviction, returns the evicted pointer. // returns NULL. If it causes an eviction, returns the evicted pointer.
......
...@@ -30,12 +30,12 @@ ...@@ -30,12 +30,12 @@
// The CM thread is created when the G1 garbage collector is used // The CM thread is created when the G1 garbage collector is used
ConcurrentG1RefineThread:: ConcurrentG1RefineThread::
ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) : ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next, int worker_id) :
ConcurrentGCThread(), ConcurrentGCThread(),
_worker_id(worker_id),
_active(false),
_next(next),
_cg1r(cg1r), _cg1r(cg1r),
_started(false),
_in_progress(false),
_do_traversal(false),
_vtime_accum(0.0), _vtime_accum(0.0),
_co_tracker(G1CRGroup), _co_tracker(G1CRGroup),
_interval_ms(5.0) _interval_ms(5.0)
...@@ -43,66 +43,69 @@ ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) : ...@@ -43,66 +43,69 @@ ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) :
create_and_start(); create_and_start();
} }
const long timeout = 200; // ms. void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
G1CollectorPolicy* g1p = g1h->g1_policy();
if (g1p->adaptive_young_list_length()) {
int regions_visited = 0;
void ConcurrentG1RefineThread::traversalBasedRefinement() { g1h->young_list_rs_length_sampling_init();
_cg1r->wait_for_ConcurrentG1Refine_enabled(); while (g1h->young_list_rs_length_sampling_more()) {
MutexLocker x(G1ConcRefine_mon); g1h->young_list_rs_length_sampling_next();
while (_cg1r->enabled()) { ++regions_visited;
MutexUnlocker ux(G1ConcRefine_mon);
ResourceMark rm;
HandleMark hm;
if (G1TraceConcurrentRefinement) { // we try to yield every time we visit 10 regions
gclog_or_tty->print_cr("G1-Refine starting pass"); if (regions_visited == 10) {
} if (_sts.should_yield()) {
_sts.join(); _sts.yield("G1 refine");
bool no_sleep = _cg1r->refine(); // we just abandon the iteration
_sts.leave(); break;
if (!no_sleep) { }
MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); regions_visited = 0;
// We do this only for the timeout; we don't expect this to be signalled. }
CGC_lock->wait(Mutex::_no_safepoint_check_flag, timeout);
} }
g1p->check_prediction_validity();
} }
} }
void ConcurrentG1RefineThread::queueBasedRefinement() { void ConcurrentG1RefineThread::run() {
DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); initialize_in_thread();
// Wait for completed log buffers to exist. _vtime_start = os::elapsedVTime();
{ wait_for_universe_init();
MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
while (!_do_traversal && !dcqs.process_completed_buffers() &&
!_should_terminate) {
DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag);
}
}
if (_should_terminate) { _co_tracker.enable();
return; _co_tracker.start();
}
// Now we take them off (this doesn't hold locks while it applies while (!_should_terminate) {
// closures.) (If we did a full collection, then we'll do a full DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
// traversal. // Wait for completed log buffers to exist.
_sts.join(); {
if (_do_traversal) { MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
(void)_cg1r->refine(); while (((_worker_id == 0 && !dcqs.process_completed_buffers()) ||
switch (_cg1r->get_last_pya()) { (_worker_id > 0 && !is_active())) &&
case PYA_cancel: case PYA_continue: !_should_terminate) {
// Continue was caught and handled inside "refine". If it's still DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag);
// "continue" when we get here, we're done. }
_do_traversal = false; }
break;
case PYA_restart: if (_should_terminate) {
assert(_do_traversal, "Because of Full GC."); return;
break;
} }
} else {
// Now we take them off (this doesn't hold locks while it applies
// closures.) (If we did a full collection, then we'll do a full
// traversal.
_sts.join();
int n_logs = 0; int n_logs = 0;
int lower_limit = 0; int lower_limit = 0;
double start_vtime_sec; // only used when G1SmoothConcRefine is on double start_vtime_sec; // only used when G1SmoothConcRefine is on
int prev_buffer_num; // only used when G1SmoothConcRefine is on int prev_buffer_num; // only used when G1SmoothConcRefine is on
// This thread activation threshold
int threshold = DCQBarrierProcessCompletedThreshold * _worker_id;
// Next thread activation threshold
int next_threshold = threshold + DCQBarrierProcessCompletedThreshold;
int deactivation_threshold = MAX2<int>(threshold - DCQBarrierProcessCompletedThreshold / 2, 0);
if (G1SmoothConcRefine) { if (G1SmoothConcRefine) {
lower_limit = 0; lower_limit = 0;
...@@ -111,29 +114,49 @@ void ConcurrentG1RefineThread::queueBasedRefinement() { ...@@ -111,29 +114,49 @@ void ConcurrentG1RefineThread::queueBasedRefinement() {
} else { } else {
lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now. lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now.
} }
while (dcqs.apply_closure_to_completed_buffer(0, lower_limit)) { while (dcqs.apply_closure_to_completed_buffer(_worker_id, lower_limit)) {
double end_vtime_sec; double end_vtime_sec;
double elapsed_vtime_sec; double elapsed_vtime_sec;
int elapsed_vtime_ms; int elapsed_vtime_ms;
int curr_buffer_num; int curr_buffer_num = (int) dcqs.completed_buffers_num();
if (G1SmoothConcRefine) { if (G1SmoothConcRefine) {
end_vtime_sec = os::elapsedVTime(); end_vtime_sec = os::elapsedVTime();
elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0); elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0);
curr_buffer_num = (int) dcqs.completed_buffers_num();
if (curr_buffer_num > prev_buffer_num || if (curr_buffer_num > prev_buffer_num ||
curr_buffer_num > DCQBarrierProcessCompletedThreshold) { curr_buffer_num > next_threshold) {
decreaseInterval(elapsed_vtime_ms); decreaseInterval(elapsed_vtime_ms);
} else if (curr_buffer_num < prev_buffer_num) { } else if (curr_buffer_num < prev_buffer_num) {
increaseInterval(elapsed_vtime_ms); increaseInterval(elapsed_vtime_ms);
} }
} }
if (_worker_id == 0) {
sample_young_list_rs_lengths(); sample_young_list_rs_lengths();
} else if (curr_buffer_num < deactivation_threshold) {
// If the number of the buffer has fallen below our threshold
// we should deactivate. The predecessor will reactivate this
// thread should the number of the buffers cross the threshold again.
MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
deactivate();
if (G1TraceConcurrentRefinement) {
gclog_or_tty->print_cr("G1-Refine-deactivated worker %d", _worker_id);
}
break;
}
_co_tracker.update(false); _co_tracker.update(false);
// Check if we need to activate the next thread.
if (curr_buffer_num > next_threshold && _next != NULL && !_next->is_active()) {
MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
_next->activate();
DirtyCardQ_CBL_mon->notify_all();
if (G1TraceConcurrentRefinement) {
gclog_or_tty->print_cr("G1-Refine-activated worker %d", _next->_worker_id);
}
}
if (G1SmoothConcRefine) { if (G1SmoothConcRefine) {
prev_buffer_num = curr_buffer_num; prev_buffer_num = curr_buffer_num;
_sts.leave(); _sts.leave();
...@@ -143,56 +166,9 @@ void ConcurrentG1RefineThread::queueBasedRefinement() { ...@@ -143,56 +166,9 @@ void ConcurrentG1RefineThread::queueBasedRefinement() {
} }
n_logs++; n_logs++;
} }
// Make sure we harvest the PYA, if any. _co_tracker.update(false);
(void)_cg1r->get_pya();
}
_sts.leave();
}
void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
G1CollectorPolicy* g1p = g1h->g1_policy();
if (g1p->adaptive_young_list_length()) {
int regions_visited = 0;
g1h->young_list_rs_length_sampling_init();
while (g1h->young_list_rs_length_sampling_more()) {
g1h->young_list_rs_length_sampling_next();
++regions_visited;
// we try to yield every time we visit 10 regions
if (regions_visited == 10) {
if (_sts.should_yield()) {
_sts.yield("G1 refine");
// we just abandon the iteration
break;
}
regions_visited = 0;
}
}
g1p->check_prediction_validity();
}
}
void ConcurrentG1RefineThread::run() {
initialize_in_thread();
_vtime_start = os::elapsedVTime();
wait_for_universe_init();
_co_tracker.enable();
_co_tracker.start();
while (!_should_terminate) {
// wait until started is set.
if (G1RSBarrierUseQueue) {
queueBasedRefinement();
} else {
traversalBasedRefinement();
}
_sts.join();
_co_tracker.update();
_sts.leave(); _sts.leave();
if (os::supports_vtime()) { if (os::supports_vtime()) {
_vtime_accum = (os::elapsedVTime() - _vtime_start); _vtime_accum = (os::elapsedVTime() - _vtime_start);
} else { } else {
...@@ -240,7 +216,3 @@ void ConcurrentG1RefineThread::print() { ...@@ -240,7 +216,3 @@ void ConcurrentG1RefineThread::print() {
Thread::print(); Thread::print();
gclog_or_tty->cr(); gclog_or_tty->cr();
} }
void ConcurrentG1RefineThread::set_do_traversal(bool b) {
_do_traversal = b;
}
...@@ -33,21 +33,26 @@ class ConcurrentG1RefineThread: public ConcurrentGCThread { ...@@ -33,21 +33,26 @@ class ConcurrentG1RefineThread: public ConcurrentGCThread {
double _vtime_start; // Initial virtual time. double _vtime_start; // Initial virtual time.
double _vtime_accum; // Initial virtual time. double _vtime_accum; // Initial virtual time.
int _worker_id;
// The refinement threads collection is linked list. A predecessor can activate a successor
// when the number of the rset update buffer crosses a certain threshold. A successor
// would self-deactivate when the number of the buffers falls below the threshold.
bool _active;
ConcurrentG1RefineThread * _next;
public: public:
virtual void run(); virtual void run();
bool is_active() { return _active; }
void activate() { _active = true; }
void deactivate() { _active = false; }
private: private:
ConcurrentG1Refine* _cg1r; ConcurrentG1Refine* _cg1r;
bool _started;
bool _in_progress;
volatile bool _restart;
COTracker _co_tracker; COTracker _co_tracker;
double _interval_ms; double _interval_ms;
bool _do_traversal;
void decreaseInterval(int processing_time_ms) { void decreaseInterval(int processing_time_ms) {
double min_interval_ms = (double) processing_time_ms; double min_interval_ms = (double) processing_time_ms;
_interval_ms = 0.8 * _interval_ms; _interval_ms = 0.8 * _interval_ms;
...@@ -63,16 +68,12 @@ class ConcurrentG1RefineThread: public ConcurrentGCThread { ...@@ -63,16 +68,12 @@ class ConcurrentG1RefineThread: public ConcurrentGCThread {
void sleepBeforeNextCycle(); void sleepBeforeNextCycle();
void traversalBasedRefinement();
void queueBasedRefinement();
// For use by G1CollectedHeap, which is a friend. // For use by G1CollectedHeap, which is a friend.
static SuspendibleThreadSet* sts() { return &_sts; } static SuspendibleThreadSet* sts() { return &_sts; }
public: public:
// Constructor // Constructor
ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r); ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread* next, int worker_id);
// Printing // Printing
void print(); void print();
...@@ -82,23 +83,11 @@ class ConcurrentG1RefineThread: public ConcurrentGCThread { ...@@ -82,23 +83,11 @@ class ConcurrentG1RefineThread: public ConcurrentGCThread {
ConcurrentG1Refine* cg1r() { return _cg1r; } ConcurrentG1Refine* cg1r() { return _cg1r; }
void set_started() { _started = true; }
void clear_started() { _started = false; }
bool started() { return _started; }
void set_in_progress() { _in_progress = true; }
void clear_in_progress() { _in_progress = false; }
bool in_progress() { return _in_progress; }
void set_do_traversal(bool b);
bool do_traversal() { return _do_traversal; }
void sample_young_list_rs_lengths(); void sample_young_list_rs_lengths();
// Yield for GC // Yield for GC
void yield(); void yield();
// shutdown // shutdown
static void stop(); void stop();
}; };
...@@ -80,5 +80,5 @@ class ConcurrentMarkThread: public ConcurrentGCThread { ...@@ -80,5 +80,5 @@ class ConcurrentMarkThread: public ConcurrentGCThread {
void yield(); void yield();
// shutdown // shutdown
static void stop(); void stop();
}; };
...@@ -73,7 +73,7 @@ class ConcurrentZFThread: public ConcurrentGCThread { ...@@ -73,7 +73,7 @@ class ConcurrentZFThread: public ConcurrentGCThread {
// while holding the ZF_needed_mon lock. // while holding the ZF_needed_mon lock.
// shutdown // shutdown
static void stop(); void stop();
// Stats // Stats
static void note_region_alloc() {_region_allocs++; } static void note_region_alloc() {_region_allocs++; }
......
...@@ -234,7 +234,7 @@ bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i, ...@@ -234,7 +234,7 @@ bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i,
nd = get_completed_buffer_lock(stop_at); nd = get_completed_buffer_lock(stop_at);
} }
bool res = apply_closure_to_completed_buffer_helper(worker_i, nd); bool res = apply_closure_to_completed_buffer_helper(worker_i, nd);
if (res) _processed_buffers_rs_thread++; if (res) Atomic::inc(&_processed_buffers_rs_thread);
return res; return res;
} }
......
...@@ -447,7 +447,7 @@ void YoungList::print() { ...@@ -447,7 +447,7 @@ void YoungList::print() {
} }
void G1CollectedHeap::stop_conc_gc_threads() { void G1CollectedHeap::stop_conc_gc_threads() {
_cg1r->cg1rThread()->stop(); _cg1r->stop();
_czft->stop(); _czft->stop();
_cmThread->stop(); _cmThread->stop();
} }
...@@ -1001,12 +1001,8 @@ void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs, ...@@ -1001,12 +1001,8 @@ void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs,
gc_epilogue(true); gc_epilogue(true);
// Abandon concurrent refinement. This must happen last: in the // Discard all rset updates
// dirty-card logging system, some cards may be dirty by weak-ref JavaThread::dirty_card_queue_set().abandon_logs();
// processing, and may be enqueued. But the whole card table is
// dirtied, so this should abandon those logs, and set "do_traversal"
// to true.
concurrent_g1_refine()->set_pya_restart();
assert(!G1DeferredRSUpdate assert(!G1DeferredRSUpdate
|| (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any"); || (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any");
assert(regions_accounted_for(), "Region leakage!"); assert(regions_accounted_for(), "Region leakage!");
...@@ -1521,12 +1517,12 @@ jint G1CollectedHeap::initialize() { ...@@ -1521,12 +1517,12 @@ jint G1CollectedHeap::initialize() {
SATB_Q_FL_lock, SATB_Q_FL_lock,
0, 0,
Shared_SATB_Q_lock); Shared_SATB_Q_lock);
if (G1RSBarrierUseQueue) {
JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
DirtyCardQ_FL_lock, DirtyCardQ_FL_lock,
G1DirtyCardQueueMax, G1DirtyCardQueueMax,
Shared_DirtyCardQ_lock); Shared_DirtyCardQ_lock);
}
if (G1DeferredRSUpdate) { if (G1DeferredRSUpdate) {
dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
DirtyCardQ_FL_lock, DirtyCardQ_FL_lock,
...@@ -2249,6 +2245,15 @@ void G1CollectedHeap::print_on(outputStream* st) const { ...@@ -2249,6 +2245,15 @@ void G1CollectedHeap::print_on(outputStream* st) const {
_hrs->iterate(&blk); _hrs->iterate(&blk);
} }
class PrintOnThreadsClosure : public ThreadClosure {
outputStream* _st;
public:
PrintOnThreadsClosure(outputStream* st) : _st(st) { }
virtual void do_thread(Thread *t) {
t->print_on(_st);
}
};
void G1CollectedHeap::print_gc_threads_on(outputStream* st) const { void G1CollectedHeap::print_gc_threads_on(outputStream* st) const {
if (ParallelGCThreads > 0) { if (ParallelGCThreads > 0) {
workers()->print_worker_threads(); workers()->print_worker_threads();
...@@ -2256,8 +2261,9 @@ void G1CollectedHeap::print_gc_threads_on(outputStream* st) const { ...@@ -2256,8 +2261,9 @@ void G1CollectedHeap::print_gc_threads_on(outputStream* st) const {
st->print("\"G1 concurrent mark GC Thread\" "); st->print("\"G1 concurrent mark GC Thread\" ");
_cmThread->print(); _cmThread->print();
st->cr(); st->cr();
st->print("\"G1 concurrent refinement GC Thread\" "); st->print("\"G1 concurrent refinement GC Threads\" ");
_cg1r->cg1rThread()->print_on(st); PrintOnThreadsClosure p(st);
_cg1r->threads_do(&p);
st->cr(); st->cr();
st->print("\"G1 zero-fill GC Thread\" "); st->print("\"G1 zero-fill GC Thread\" ");
_czft->print_on(st); _czft->print_on(st);
...@@ -2269,7 +2275,7 @@ void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const { ...@@ -2269,7 +2275,7 @@ void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const {
workers()->threads_do(tc); workers()->threads_do(tc);
} }
tc->do_thread(_cmThread); tc->do_thread(_cmThread);
tc->do_thread(_cg1r->cg1rThread()); _cg1r->threads_do(tc);
tc->do_thread(_czft); tc->do_thread(_czft);
} }
......
...@@ -167,11 +167,6 @@ G1CollectorPolicy::G1CollectorPolicy() : ...@@ -167,11 +167,6 @@ G1CollectorPolicy::G1CollectorPolicy() :
_all_full_gc_times_ms(new NumberSeq()), _all_full_gc_times_ms(new NumberSeq()),
_conc_refine_enabled(0),
_conc_refine_zero_traversals(0),
_conc_refine_max_traversals(0),
_conc_refine_current_delta(G1ConcRefineInitialDelta),
// G1PausesBtwnConcMark defaults to -1 // G1PausesBtwnConcMark defaults to -1
// so the hack is to do the cast QQQ FIXME // so the hack is to do the cast QQQ FIXME
_pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark), _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark),
...@@ -1634,9 +1629,8 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) { ...@@ -1634,9 +1629,8 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) {
print_stats(1, "Parallel Time", _cur_collection_par_time_ms); print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
print_par_stats(2, "Update RS (Start)", _par_last_update_rs_start_times_ms, false); print_par_stats(2, "Update RS (Start)", _par_last_update_rs_start_times_ms, false);
print_par_stats(2, "Update RS", _par_last_update_rs_times_ms); print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
if (G1RSBarrierUseQueue) print_par_buffers(3, "Processed Buffers",
print_par_buffers(3, "Processed Buffers", _par_last_update_rs_processed_buffers, true);
_par_last_update_rs_processed_buffers, true);
print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms); print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms); print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
print_par_stats(2, "Scan-Only Scanning", _par_last_scan_only_times_ms); print_par_stats(2, "Scan-Only Scanning", _par_last_scan_only_times_ms);
...@@ -1649,9 +1643,8 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) { ...@@ -1649,9 +1643,8 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) {
print_stats(1, "Clear CT", _cur_clear_ct_time_ms); print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
} else { } else {
print_stats(1, "Update RS", update_rs_time); print_stats(1, "Update RS", update_rs_time);
if (G1RSBarrierUseQueue) print_stats(2, "Processed Buffers",
print_stats(2, "Processed Buffers", (int)update_rs_processed_buffers);
(int)update_rs_processed_buffers);
print_stats(1, "Ext Root Scanning", ext_root_scan_time); print_stats(1, "Ext Root Scanning", ext_root_scan_time);
print_stats(1, "Mark Stack Scanning", mark_stack_scan_time); print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
print_stats(1, "Scan-Only Scanning", scan_only_time); print_stats(1, "Scan-Only Scanning", scan_only_time);
...@@ -2467,18 +2460,6 @@ void G1CollectorPolicy::print_tracing_info() const { ...@@ -2467,18 +2460,6 @@ void G1CollectorPolicy::print_tracing_info() const {
(double) _region_num_young / (double) all_region_num * 100.0, (double) _region_num_young / (double) all_region_num * 100.0,
_region_num_tenured, _region_num_tenured,
(double) _region_num_tenured / (double) all_region_num * 100.0); (double) _region_num_tenured / (double) all_region_num * 100.0);
if (!G1RSBarrierUseQueue) {
gclog_or_tty->print_cr("Of %d times conc refinement was enabled, %d (%7.2f%%) "
"did zero traversals.",
_conc_refine_enabled, _conc_refine_zero_traversals,
_conc_refine_enabled > 0 ?
100.0 * (float)_conc_refine_zero_traversals/
(float)_conc_refine_enabled : 0.0);
gclog_or_tty->print_cr(" Max # of traversals = %d.",
_conc_refine_max_traversals);
gclog_or_tty->print_cr("");
}
} }
if (TraceGen1Time) { if (TraceGen1Time) {
if (_all_full_gc_times_ms->num() > 0) { if (_all_full_gc_times_ms->num() > 0) {
...@@ -2500,38 +2481,6 @@ void G1CollectorPolicy::print_yg_surv_rate_info() const { ...@@ -2500,38 +2481,6 @@ void G1CollectorPolicy::print_yg_surv_rate_info() const {
#endif // PRODUCT #endif // PRODUCT
} }
void G1CollectorPolicy::update_conc_refine_data() {
unsigned traversals = _g1->concurrent_g1_refine()->disable();
if (traversals == 0) _conc_refine_zero_traversals++;
_conc_refine_max_traversals = MAX2(_conc_refine_max_traversals,
(size_t)traversals);
if (G1PolicyVerbose > 1)
gclog_or_tty->print_cr("Did a CR traversal series: %d traversals.", traversals);
double multiplier = 1.0;
if (traversals == 0) {
multiplier = 4.0;
} else if (traversals > (size_t)G1ConcRefineTargTraversals) {
multiplier = 1.0/1.5;
} else if (traversals < (size_t)G1ConcRefineTargTraversals) {
multiplier = 1.5;
}
if (G1PolicyVerbose > 1) {
gclog_or_tty->print_cr(" Multiplier = %7.2f.", multiplier);
gclog_or_tty->print(" Delta went from %d regions to ",
_conc_refine_current_delta);
}
_conc_refine_current_delta =
MIN2(_g1->n_regions(),
(size_t)(_conc_refine_current_delta * multiplier));
_conc_refine_current_delta =
MAX2(_conc_refine_current_delta, (size_t)1);
if (G1PolicyVerbose > 1) {
gclog_or_tty->print_cr("%d regions.", _conc_refine_current_delta);
}
_conc_refine_enabled++;
}
bool bool
G1CollectorPolicy::should_add_next_region_to_young_list() { G1CollectorPolicy::should_add_next_region_to_young_list() {
assert(in_young_gc_mode(), "should be in young GC mode"); assert(in_young_gc_mode(), "should be in young GC mode");
......
...@@ -637,18 +637,6 @@ protected: ...@@ -637,18 +637,6 @@ protected:
// The number of collection pauses at the end of the last mark. // The number of collection pauses at the end of the last mark.
size_t _n_pauses_at_mark_end; size_t _n_pauses_at_mark_end;
// ==== This section is for stats related to starting Conc Refinement on time.
size_t _conc_refine_enabled;
size_t _conc_refine_zero_traversals;
size_t _conc_refine_max_traversals;
// In # of heap regions.
size_t _conc_refine_current_delta;
// At the beginning of a collection pause, update the variables above,
// especially the "delta".
void update_conc_refine_data();
// ====
// Stash a pointer to the g1 heap. // Stash a pointer to the g1 heap.
G1CollectedHeap* _g1; G1CollectedHeap* _g1;
......
...@@ -105,28 +105,6 @@ StupidG1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, ...@@ -105,28 +105,6 @@ StupidG1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
_g1->heap_region_iterate(&rc); _g1->heap_region_iterate(&rc);
} }
class UpdateRSOutOfRegionClosure: public HeapRegionClosure {
G1CollectedHeap* _g1h;
ModRefBarrierSet* _mr_bs;
UpdateRSOopClosure _cl;
int _worker_i;
public:
UpdateRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) :
_cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i),
_mr_bs(g1->mr_bs()),
_worker_i(worker_i),
_g1h(g1)
{}
bool doHeapRegion(HeapRegion* r) {
if (!r->in_collection_set() && !r->continuesHumongous()) {
_cl.set_from(r);
r->set_next_filter_kind(HeapRegionDCTOC::OutOfRegionFilterKind);
_mr_bs->mod_oop_in_space_iterate(r, &_cl, true, true);
}
return false;
}
};
class VerifyRSCleanCardOopClosure: public OopClosure { class VerifyRSCleanCardOopClosure: public OopClosure {
G1CollectedHeap* _g1; G1CollectedHeap* _g1;
public: public:
...@@ -350,30 +328,17 @@ void HRInto_G1RemSet::updateRS(int worker_i) { ...@@ -350,30 +328,17 @@ void HRInto_G1RemSet::updateRS(int worker_i) {
double start = os::elapsedTime(); double start = os::elapsedTime();
_g1p->record_update_rs_start_time(worker_i, start * 1000.0); _g1p->record_update_rs_start_time(worker_i, start * 1000.0);
if (G1RSBarrierUseQueue && !cg1r->do_traversal()) { // Apply the appropriate closure to all remaining log entries.
// Apply the appropriate closure to all remaining log entries. _g1->iterate_dirty_card_closure(false, worker_i);
_g1->iterate_dirty_card_closure(false, worker_i); // Now there should be no dirty cards.
// Now there should be no dirty cards. if (G1RSLogCheckCardTable) {
if (G1RSLogCheckCardTable) { CountNonCleanMemRegionClosure cl(_g1);
CountNonCleanMemRegionClosure cl(_g1); _ct_bs->mod_card_iterate(&cl);
_ct_bs->mod_card_iterate(&cl); // XXX This isn't true any more: keeping cards of young regions
// XXX This isn't true any more: keeping cards of young regions // marked dirty broke it. Need some reasonable fix.
// marked dirty broke it. Need some reasonable fix. guarantee(cl.n() == 0, "Card table should be clean.");
guarantee(cl.n() == 0, "Card table should be clean.");
}
} else {
UpdateRSOutOfRegionClosure update_rs(_g1, worker_i);
_g1->heap_region_iterate(&update_rs);
// We did a traversal; no further one is necessary.
if (G1RSBarrierUseQueue) {
assert(cg1r->do_traversal(), "Or we shouldn't have gotten here.");
cg1r->set_pya_cancel();
}
if (_cg1r->use_cache()) {
_cg1r->clear_and_record_card_counts();
_cg1r->clear_hot_cache();
}
} }
_g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0); _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
} }
...@@ -486,11 +451,6 @@ HRInto_G1RemSet::scanNewRefsRS(OopsInHeapRegionClosure* oc, ...@@ -486,11 +451,6 @@ HRInto_G1RemSet::scanNewRefsRS(OopsInHeapRegionClosure* oc,
* 1000.0); * 1000.0);
} }
void HRInto_G1RemSet::set_par_traversal(bool b) {
_par_traversal_in_progress = b;
HeapRegionRemSet::set_par_traversal(b);
}
void HRInto_G1RemSet::cleanupHRRS() { void HRInto_G1RemSet::cleanupHRRS() {
HeapRegionRemSet::cleanup(); HeapRegionRemSet::cleanup();
} }
...@@ -527,7 +487,7 @@ HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, ...@@ -527,7 +487,7 @@ HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
updateRS(worker_i); updateRS(worker_i);
scanNewRefsRS(oc, worker_i); scanNewRefsRS(oc, worker_i);
} else { } else {
_g1p->record_update_rs_start_time(worker_i, os::elapsedTime()); _g1p->record_update_rs_start_time(worker_i, os::elapsedTime() * 1000.0);
_g1p->record_update_rs_processed_buffers(worker_i, 0.0); _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
_g1p->record_update_rs_time(worker_i, 0.0); _g1p->record_update_rs_time(worker_i, 0.0);
_g1p->record_scan_new_refs_time(worker_i, 0.0); _g1p->record_scan_new_refs_time(worker_i, 0.0);
...@@ -535,7 +495,7 @@ HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, ...@@ -535,7 +495,7 @@ HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
if (G1ParallelRSetScanningEnabled || (worker_i == 0)) { if (G1ParallelRSetScanningEnabled || (worker_i == 0)) {
scanRS(oc, worker_i); scanRS(oc, worker_i);
} else { } else {
_g1p->record_scan_rs_start_time(worker_i, os::elapsedTime()); _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime() * 1000.0);
_g1p->record_scan_rs_time(worker_i, 0.0); _g1p->record_scan_rs_time(worker_i, 0.0);
} }
} else { } else {
...@@ -562,11 +522,6 @@ prepare_for_oops_into_collection_set_do() { ...@@ -562,11 +522,6 @@ prepare_for_oops_into_collection_set_do() {
if (ParallelGCThreads > 0) { if (ParallelGCThreads > 0) {
set_par_traversal(true); set_par_traversal(true);
_seq_task->set_par_threads((int)n_workers()); _seq_task->set_par_threads((int)n_workers());
if (cg1r->do_traversal()) {
updateRS(0);
// Have to do this again after updaters
cleanupHRRS();
}
} }
guarantee( _cards_scanned == NULL, "invariant" ); guarantee( _cards_scanned == NULL, "invariant" );
_cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers()); _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
...@@ -647,11 +602,8 @@ void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() { ...@@ -647,11 +602,8 @@ void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
_g1->collection_set_iterate(&iterClosure); _g1->collection_set_iterate(&iterClosure);
// Set all cards back to clean. // Set all cards back to clean.
_g1->cleanUpCardTable(); _g1->cleanUpCardTable();
if (ParallelGCThreads > 0) { if (ParallelGCThreads > 0) {
ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
if (cg1r->do_traversal()) {
cg1r->cg1rThread()->set_do_traversal(false);
}
set_par_traversal(false); set_par_traversal(false);
} }
...@@ -721,139 +673,8 @@ void HRInto_G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm, ...@@ -721,139 +673,8 @@ void HRInto_G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
} }
class ConcRefineRegionClosure: public HeapRegionClosure {
G1CollectedHeap* _g1h;
CardTableModRefBS* _ctbs;
ConcurrentGCThread* _cgc_thrd;
ConcurrentG1Refine* _cg1r;
unsigned _cards_processed;
UpdateRSOopClosure _update_rs_oop_cl;
public:
ConcRefineRegionClosure(CardTableModRefBS* ctbs,
ConcurrentG1Refine* cg1r,
HRInto_G1RemSet* g1rs) :
_ctbs(ctbs), _cg1r(cg1r), _cgc_thrd(cg1r->cg1rThread()),
_update_rs_oop_cl(g1rs), _cards_processed(0),
_g1h(G1CollectedHeap::heap())
{}
bool doHeapRegion(HeapRegion* r) {
if (!r->in_collection_set() &&
!r->continuesHumongous() &&
!r->is_young()) {
_update_rs_oop_cl.set_from(r);
UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl);
// For each run of dirty card in the region:
// 1) Clear the cards.
// 2) Process the range corresponding to the run, adding any
// necessary RS entries.
// 1 must precede 2, so that a concurrent modification redirties the
// card. If a processing attempt does not succeed, because it runs
// into an unparseable region, we will do binary search to find the
// beginning of the next parseable region.
HeapWord* startAddr = r->bottom();
HeapWord* endAddr = r->used_region().end();
HeapWord* lastAddr;
HeapWord* nextAddr;
for (nextAddr = lastAddr = startAddr;
nextAddr < endAddr;
nextAddr = lastAddr) {
MemRegion dirtyRegion;
// Get and clear dirty region from card table
MemRegion next_mr(nextAddr, endAddr);
dirtyRegion =
_ctbs->dirty_card_range_after_reset(
next_mr,
true, CardTableModRefBS::clean_card_val());
assert(dirtyRegion.start() >= nextAddr,
"returned region inconsistent?");
if (!dirtyRegion.is_empty()) {
HeapWord* stop_point =
r->object_iterate_mem_careful(dirtyRegion,
&update_rs_obj_cl);
if (stop_point == NULL) {
lastAddr = dirtyRegion.end();
_cards_processed +=
(int) (dirtyRegion.word_size() / CardTableModRefBS::card_size_in_words);
} else {
// We're going to skip one or more cards that we can't parse.
HeapWord* next_parseable_card =
r->next_block_start_careful(stop_point);
// Round this up to a card boundary.
next_parseable_card =
_ctbs->addr_for(_ctbs->byte_after_const(next_parseable_card));
// Now we invalidate the intervening cards so we'll see them
// again.
MemRegion remaining_dirty =
MemRegion(stop_point, dirtyRegion.end());
MemRegion skipped =
MemRegion(stop_point, next_parseable_card);
_ctbs->invalidate(skipped.intersection(remaining_dirty));
// Now start up again where we can parse.
lastAddr = next_parseable_card;
// Count how many we did completely.
_cards_processed +=
(stop_point - dirtyRegion.start()) /
CardTableModRefBS::card_size_in_words;
}
// Allow interruption at regular intervals.
// (Might need to make them more regular, if we get big
// dirty regions.)
if (_cgc_thrd != NULL) {
if (_cgc_thrd->should_yield()) {
_cgc_thrd->yield();
switch (_cg1r->get_pya()) {
case PYA_continue:
// This may have changed: re-read.
endAddr = r->used_region().end();
continue;
case PYA_restart: case PYA_cancel:
return true;
}
}
}
} else {
break;
}
}
}
// A good yield opportunity.
if (_cgc_thrd != NULL) {
if (_cgc_thrd->should_yield()) {
_cgc_thrd->yield();
switch (_cg1r->get_pya()) {
case PYA_restart: case PYA_cancel:
return true;
default:
break;
}
}
}
return false;
}
unsigned cards_processed() { return _cards_processed; }
};
void HRInto_G1RemSet::concurrentRefinementPass(ConcurrentG1Refine* cg1r) {
ConcRefineRegionClosure cr_cl(ct_bs(), cg1r, this);
_g1->heap_region_iterate(&cr_cl);
_conc_refine_traversals++;
_conc_refine_cards += cr_cl.cards_processed();
}
static IntHistogram out_of_histo(50, 50); static IntHistogram out_of_histo(50, 50);
void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) { void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
// If the card is no longer dirty, nothing to do. // If the card is no longer dirty, nothing to do.
if (*card_ptr != CardTableModRefBS::dirty_card_val()) return; if (*card_ptr != CardTableModRefBS::dirty_card_val()) return;
...@@ -983,10 +804,16 @@ public: ...@@ -983,10 +804,16 @@ public:
HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; } HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
}; };
class PrintRSThreadVTimeClosure : public ThreadClosure {
public:
virtual void do_thread(Thread *t) {
ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t;
gclog_or_tty->print(" %5.2f", crt->vtime_accum());
}
};
void HRInto_G1RemSet::print_summary_info() { void HRInto_G1RemSet::print_summary_info() {
G1CollectedHeap* g1 = G1CollectedHeap::heap(); G1CollectedHeap* g1 = G1CollectedHeap::heap();
ConcurrentG1RefineThread* cg1r_thrd =
g1->concurrent_g1_refine()->cg1rThread();
#if CARD_REPEAT_HISTO #if CARD_REPEAT_HISTO
gclog_or_tty->print_cr("\nG1 card_repeat count histogram: "); gclog_or_tty->print_cr("\nG1 card_repeat count histogram: ");
...@@ -999,15 +826,13 @@ void HRInto_G1RemSet::print_summary_info() { ...@@ -999,15 +826,13 @@ void HRInto_G1RemSet::print_summary_info() {
gclog_or_tty->print_cr(" # of CS ptrs --> # of cards with that number."); gclog_or_tty->print_cr(" # of CS ptrs --> # of cards with that number.");
out_of_histo.print_on(gclog_or_tty); out_of_histo.print_on(gclog_or_tty);
} }
gclog_or_tty->print_cr("\n Concurrent RS processed %d cards in " gclog_or_tty->print_cr("\n Concurrent RS processed %d cards",
"%5.2fs.", _conc_refine_cards);
_conc_refine_cards, cg1r_thrd->vtime_accum());
DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
jint tot_processed_buffers = jint tot_processed_buffers =
dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread(); dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread();
gclog_or_tty->print_cr(" Of %d completed buffers:", tot_processed_buffers); gclog_or_tty->print_cr(" Of %d completed buffers:", tot_processed_buffers);
gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS thread.", gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS threads.",
dcqs.processed_buffers_rs_thread(), dcqs.processed_buffers_rs_thread(),
100.0*(float)dcqs.processed_buffers_rs_thread()/ 100.0*(float)dcqs.processed_buffers_rs_thread()/
(float)tot_processed_buffers); (float)tot_processed_buffers);
...@@ -1015,15 +840,12 @@ void HRInto_G1RemSet::print_summary_info() { ...@@ -1015,15 +840,12 @@ void HRInto_G1RemSet::print_summary_info() {
dcqs.processed_buffers_mut(), dcqs.processed_buffers_mut(),
100.0*(float)dcqs.processed_buffers_mut()/ 100.0*(float)dcqs.processed_buffers_mut()/
(float)tot_processed_buffers); (float)tot_processed_buffers);
gclog_or_tty->print_cr(" Did %d concurrent refinement traversals.", gclog_or_tty->print_cr(" Conc RS threads times(s)");
_conc_refine_traversals); PrintRSThreadVTimeClosure p;
if (!G1RSBarrierUseQueue) { gclog_or_tty->print(" ");
gclog_or_tty->print_cr(" Scanned %8.2f cards/traversal.", g1->concurrent_g1_refine()->threads_do(&p);
_conc_refine_traversals > 0 ?
(float)_conc_refine_cards/(float)_conc_refine_traversals :
0);
}
gclog_or_tty->print_cr(""); gclog_or_tty->print_cr("");
if (G1UseHRIntoRS) { if (G1UseHRIntoRS) {
HRRSStatsIter blk; HRRSStatsIter blk;
g1->heap_region_iterate(&blk); g1->heap_region_iterate(&blk);
......
...@@ -33,15 +33,12 @@ class ConcurrentG1Refine; ...@@ -33,15 +33,12 @@ class ConcurrentG1Refine;
class G1RemSet: public CHeapObj { class G1RemSet: public CHeapObj {
protected: protected:
G1CollectedHeap* _g1; G1CollectedHeap* _g1;
unsigned _conc_refine_traversals;
unsigned _conc_refine_cards; unsigned _conc_refine_cards;
size_t n_workers(); size_t n_workers();
public: public:
G1RemSet(G1CollectedHeap* g1) : G1RemSet(G1CollectedHeap* g1) :
_g1(g1), _conc_refine_traversals(0), _conc_refine_cards(0) _g1(g1), _conc_refine_cards(0)
{} {}
// Invoke "blk->do_oop" on all pointers into the CS in object in regions // Invoke "blk->do_oop" on all pointers into the CS in object in regions
...@@ -81,19 +78,11 @@ public: ...@@ -81,19 +78,11 @@ public:
virtual void scrub_par(BitMap* region_bm, BitMap* card_bm, virtual void scrub_par(BitMap* region_bm, BitMap* card_bm,
int worker_num, int claim_val) = 0; int worker_num, int claim_val) = 0;
// Do any "refinement" activity that might be appropriate to the given
// G1RemSet. If "refinement" has iterateive "passes", do one pass.
// If "t" is non-NULL, it is the thread performing the refinement.
// Default implementation does nothing.
virtual void concurrentRefinementPass(ConcurrentG1Refine* cg1r) {}
// Refine the card corresponding to "card_ptr". If "sts" is non-NULL, // Refine the card corresponding to "card_ptr". If "sts" is non-NULL,
// join and leave around parts that must be atomic wrt GC. (NULL means // join and leave around parts that must be atomic wrt GC. (NULL means
// being done at a safepoint.) // being done at a safepoint.)
virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {} virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {}
unsigned conc_refine_cards() { return _conc_refine_cards; }
// Print any relevant summary info. // Print any relevant summary info.
virtual void print_summary_info() {} virtual void print_summary_info() {}
...@@ -153,7 +142,7 @@ protected: ...@@ -153,7 +142,7 @@ protected:
// progress. If so, then cards added to remembered sets should also have // progress. If so, then cards added to remembered sets should also have
// their references into the collection summarized in "_new_refs". // their references into the collection summarized in "_new_refs".
bool _par_traversal_in_progress; bool _par_traversal_in_progress;
void set_par_traversal(bool b); void set_par_traversal(bool b) { _par_traversal_in_progress = b; }
GrowableArray<oop*>** _new_refs; GrowableArray<oop*>** _new_refs;
void new_refs_iterate(OopClosure* cl); void new_refs_iterate(OopClosure* cl);
...@@ -194,7 +183,6 @@ public: ...@@ -194,7 +183,6 @@ public:
void scrub_par(BitMap* region_bm, BitMap* card_bm, void scrub_par(BitMap* region_bm, BitMap* card_bm,
int worker_num, int claim_val); int worker_num, int claim_val);
virtual void concurrentRefinementPass(ConcurrentG1Refine* t);
virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i); virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i);
virtual void print_summary_info(); virtual void print_summary_info();
......
...@@ -147,9 +147,6 @@ ...@@ -147,9 +147,6 @@
develop(bool, G1PrintCTFilterStats, false, \ develop(bool, G1PrintCTFilterStats, false, \
"If true, print stats on RS filtering effectiveness") \ "If true, print stats on RS filtering effectiveness") \
\ \
develop(bool, G1RSBarrierUseQueue, true, \
"If true, use queueing RS barrier") \
\
develop(bool, G1DeferredRSUpdate, true, \ develop(bool, G1DeferredRSUpdate, true, \
"If true, use deferred RS updates") \ "If true, use deferred RS updates") \
\ \
...@@ -253,6 +250,10 @@ ...@@ -253,6 +250,10 @@
\ \
experimental(bool, G1ParallelRSetScanningEnabled, false, \ experimental(bool, G1ParallelRSetScanningEnabled, false, \
"Enables the parallelization of remembered set scanning " \ "Enables the parallelization of remembered set scanning " \
"during evacuation pauses") "during evacuation pauses") \
\
product(uintx, G1ParallelRSetThreads, 0, \
"If non-0 is the number of parallel rem set update threads, " \
"otherwise the value is determined ergonomically.")
G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG) G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
...@@ -1052,14 +1052,6 @@ bool OtherRegionsTable::contains_reference_locked(oop* from) const { ...@@ -1052,14 +1052,6 @@ bool OtherRegionsTable::contains_reference_locked(oop* from) const {
} }
bool HeapRegionRemSet::_par_traversal = false;
void HeapRegionRemSet::set_par_traversal(bool b) {
assert(_par_traversal != b, "Proper alternation...");
_par_traversal = b;
}
int HeapRegionRemSet::num_par_rem_sets() { int HeapRegionRemSet::num_par_rem_sets() {
// We always have at least two, so that a mutator thread can claim an // We always have at least two, so that a mutator thread can claim an
// id and add to a rem set. // id and add to a rem set.
......
...@@ -177,8 +177,6 @@ private: ...@@ -177,8 +177,6 @@ private:
G1BlockOffsetSharedArray* _bosa; G1BlockOffsetSharedArray* _bosa;
G1BlockOffsetSharedArray* bosa() const { return _bosa; } G1BlockOffsetSharedArray* bosa() const { return _bosa; }
static bool _par_traversal;
OtherRegionsTable _other_regions; OtherRegionsTable _other_regions;
// One set bit for every region that has an entry for this one. // One set bit for every region that has an entry for this one.
...@@ -211,8 +209,6 @@ public: ...@@ -211,8 +209,6 @@ public:
HeapRegion* hr); HeapRegion* hr);
static int num_par_rem_sets(); static int num_par_rem_sets();
static bool par_traversal() { return _par_traversal; }
static void set_par_traversal(bool b);
HeapRegion* hr() const { HeapRegion* hr() const {
return _other_regions.hr(); return _other_regions.hr();
......
...@@ -172,7 +172,7 @@ void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_ ...@@ -172,7 +172,7 @@ void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_
_n_completed_buffers++; _n_completed_buffers++;
if (!_process_completed && if (!_process_completed &&
_n_completed_buffers == _process_completed_threshold) { _n_completed_buffers >= _process_completed_threshold) {
_process_completed = true; _process_completed = true;
if (_notify_when_complete) if (_notify_when_complete)
_cbl_mon->notify_all(); _cbl_mon->notify_all();
......
...@@ -49,6 +49,8 @@ concurrentG1Refine.cpp space.inline.hpp ...@@ -49,6 +49,8 @@ concurrentG1Refine.cpp space.inline.hpp
concurrentG1Refine.hpp globalDefinitions.hpp concurrentG1Refine.hpp globalDefinitions.hpp
concurrentG1Refine.hpp allocation.hpp concurrentG1Refine.hpp allocation.hpp
concurrentG1Refine.hpp thread.hpp
concurrentG1RefineThread.cpp concurrentG1Refine.hpp concurrentG1RefineThread.cpp concurrentG1Refine.hpp
concurrentG1RefineThread.cpp concurrentG1RefineThread.hpp concurrentG1RefineThread.cpp concurrentG1RefineThread.hpp
......
...@@ -27,13 +27,12 @@ ...@@ -27,13 +27,12 @@
# include "incls/_precompiled.incl" # include "incls/_precompiled.incl"
# include "incls/_concurrentGCThread.cpp.incl" # include "incls/_concurrentGCThread.cpp.incl"
bool ConcurrentGCThread::_should_terminate = false;
bool ConcurrentGCThread::_has_terminated = false;
int ConcurrentGCThread::_CGC_flag = CGC_nil; int ConcurrentGCThread::_CGC_flag = CGC_nil;
SuspendibleThreadSet ConcurrentGCThread::_sts; SuspendibleThreadSet ConcurrentGCThread::_sts;
ConcurrentGCThread::ConcurrentGCThread() { ConcurrentGCThread::ConcurrentGCThread() :
_should_terminate(false), _has_terminated(false) {
_sts.initialize(); _sts.initialize();
}; };
......
...@@ -72,8 +72,8 @@ class ConcurrentGCThread: public NamedThread { ...@@ -72,8 +72,8 @@ class ConcurrentGCThread: public NamedThread {
friend class VMStructs; friend class VMStructs;
protected: protected:
static bool _should_terminate; bool _should_terminate;
static bool _has_terminated; bool _has_terminated;
enum CGC_flag_type { enum CGC_flag_type {
CGC_nil = 0x0, CGC_nil = 0x0,
......
...@@ -33,12 +33,8 @@ CardTableRS::CardTableRS(MemRegion whole_heap, ...@@ -33,12 +33,8 @@ CardTableRS::CardTableRS(MemRegion whole_heap,
{ {
#ifndef SERIALGC #ifndef SERIALGC
if (UseG1GC) { if (UseG1GC) {
if (G1RSBarrierUseQueue) {
_ct_bs = new G1SATBCardTableLoggingModRefBS(whole_heap, _ct_bs = new G1SATBCardTableLoggingModRefBS(whole_heap,
max_covered_regions); max_covered_regions);
} else {
_ct_bs = new G1SATBCardTableModRefBS(whole_heap, max_covered_regions);
}
} else { } else {
_ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions); _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions);
} }
......
...@@ -70,7 +70,6 @@ Monitor* FullGCCount_lock = NULL; ...@@ -70,7 +70,6 @@ Monitor* FullGCCount_lock = NULL;
Monitor* CMark_lock = NULL; Monitor* CMark_lock = NULL;
Monitor* ZF_mon = NULL; Monitor* ZF_mon = NULL;
Monitor* Cleanup_mon = NULL; Monitor* Cleanup_mon = NULL;
Monitor* G1ConcRefine_mon = NULL;
Mutex* SATB_Q_FL_lock = NULL; Mutex* SATB_Q_FL_lock = NULL;
Monitor* SATB_Q_CBL_mon = NULL; Monitor* SATB_Q_CBL_mon = NULL;
Mutex* Shared_SATB_Q_lock = NULL; Mutex* Shared_SATB_Q_lock = NULL;
...@@ -168,7 +167,6 @@ void mutex_init() { ...@@ -168,7 +167,6 @@ void mutex_init() {
def(CMark_lock , Monitor, nonleaf, true ); // coordinate concurrent mark thread def(CMark_lock , Monitor, nonleaf, true ); // coordinate concurrent mark thread
def(ZF_mon , Monitor, leaf, true ); def(ZF_mon , Monitor, leaf, true );
def(Cleanup_mon , Monitor, nonleaf, true ); def(Cleanup_mon , Monitor, nonleaf, true );
def(G1ConcRefine_mon , Monitor, nonleaf, true );
def(SATB_Q_FL_lock , Mutex , special, true ); def(SATB_Q_FL_lock , Mutex , special, true );
def(SATB_Q_CBL_mon , Monitor, nonleaf, true ); def(SATB_Q_CBL_mon , Monitor, nonleaf, true );
def(Shared_SATB_Q_lock , Mutex, nonleaf, true ); def(Shared_SATB_Q_lock , Mutex, nonleaf, true );
......
...@@ -63,9 +63,6 @@ extern Monitor* FullGCCount_lock; // in support of "concurrent" f ...@@ -63,9 +63,6 @@ extern Monitor* FullGCCount_lock; // in support of "concurrent" f
extern Monitor* CMark_lock; // used for concurrent mark thread coordination extern Monitor* CMark_lock; // used for concurrent mark thread coordination
extern Monitor* ZF_mon; // used for G1 conc zero-fill. extern Monitor* ZF_mon; // used for G1 conc zero-fill.
extern Monitor* Cleanup_mon; // used for G1 conc cleanup. extern Monitor* Cleanup_mon; // used for G1 conc cleanup.
extern Monitor* G1ConcRefine_mon; // used for G1 conc-refine
// coordination.
extern Mutex* SATB_Q_FL_lock; // Protects SATB Q extern Mutex* SATB_Q_FL_lock; // Protects SATB Q
// buffer free list. // buffer free list.
extern Monitor* SATB_Q_CBL_mon; // Protects SATB Q extern Monitor* SATB_Q_CBL_mon; // Protects SATB Q
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册