diff --git a/src/cpu/sparc/vm/assembler_sparc.cpp b/src/cpu/sparc/vm/assembler_sparc.cpp index b9d8047001fdc3eeeb90182c8350961bf3417e09..f2d58782aabe6df7195434b0293699eb01f5d4c3 100644 --- a/src/cpu/sparc/vm/assembler_sparc.cpp +++ b/src/cpu/sparc/vm/assembler_sparc.cpp @@ -4454,43 +4454,26 @@ void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val delayed()->nop(); } - // Now we decide how to generate the card table write. If we're - // enqueueing, we call out to a generated function. Otherwise, we do it - // inline here. - - if (G1RSBarrierUseQueue) { - // If the "store_addr" register is an "in" or "local" register, move it to - // a scratch reg so we can pass it as an argument. - bool use_scr = !(store_addr->is_global() || store_addr->is_out()); - // Pick a scratch register different from "tmp". - Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch); - // Make sure we use up the delay slot! - if (use_scr) { - post_filter_masm->mov(store_addr, scr); - } else { - post_filter_masm->nop(); - } - generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base); - save_frame(0); - call(dirty_card_log_enqueue); - if (use_scr) { - delayed()->mov(scr, O0); - } else { - delayed()->mov(store_addr->after_save(), O0); - } - restore(); - + // If the "store_addr" register is an "in" or "local" register, move it to + // a scratch reg so we can pass it as an argument. + bool use_scr = !(store_addr->is_global() || store_addr->is_out()); + // Pick a scratch register different from "tmp". + Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch); + // Make sure we use up the delay slot! + if (use_scr) { + post_filter_masm->mov(store_addr, scr); } else { - -#ifdef _LP64 - post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr); -#else - post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr); -#endif - assert(tmp != store_addr, "need separate temp reg"); - set(bs->byte_map_base, tmp); - stb(G0, tmp, store_addr); + post_filter_masm->nop(); } + generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base); + save_frame(0); + call(dirty_card_log_enqueue); + if (use_scr) { + delayed()->mov(scr, O0); + } else { + delayed()->mov(store_addr->after_save(), O0); + } + restore(); bind(filtered); diff --git a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp index 6f12a39621680bd7c77cc48371b96ee0497ce3d7..2e58f40ad53237c968f49ec5345da65d49f07145 100644 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp @@ -25,23 +25,26 @@ #include "incls/_precompiled.incl" #include "incls/_concurrentG1Refine.cpp.incl" -bool ConcurrentG1Refine::_enabled = false; - ConcurrentG1Refine::ConcurrentG1Refine() : - _pya(PYA_continue), _last_pya(PYA_continue), - _last_cards_during(), _first_traversal(false), _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL), _hot_cache(NULL), _def_use_cache(false), _use_cache(false), - _n_periods(0), _total_cards(0), _total_travs(0) + _n_periods(0), _total_cards(0), _total_travs(0), + _threads(NULL), _n_threads(0) { if (G1ConcRefine) { - _cg1rThread = new ConcurrentG1RefineThread(this); - assert(cg1rThread() != NULL, "Conc refine should have been created"); - assert(cg1rThread()->cg1r() == this, - "Conc refine thread should refer to this"); - } else { - _cg1rThread = NULL; + _n_threads = (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads; + if (_n_threads > 0) { + _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); + ConcurrentG1RefineThread *next = NULL; + for (int i = _n_threads - 1; i >= 0; i--) { + ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, i); + assert(t != NULL, "Conc refine should have been created"); + assert(t->cg1r() == this, "Conc refine thread should refer to this"); + _threads[i] = t; + next = t; + } + } } } @@ -75,6 +78,14 @@ void ConcurrentG1Refine::init() { } } +void ConcurrentG1Refine::stop() { + if (_threads != NULL) { + for (int i = 0; i < _n_threads; i++) { + _threads[i]->stop(); + } + } +} + ConcurrentG1Refine::~ConcurrentG1Refine() { if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { assert(_card_counts != NULL, "Logic"); @@ -88,104 +99,22 @@ ConcurrentG1Refine::~ConcurrentG1Refine() { assert(_hot_cache != NULL, "Logic"); FREE_C_HEAP_ARRAY(jbyte*, _hot_cache); } -} - -bool ConcurrentG1Refine::refine() { - G1CollectedHeap* g1h = G1CollectedHeap::heap(); - unsigned cards_before = g1h->g1_rem_set()->conc_refine_cards(); - clear_hot_cache(); // Any previous values in this are now invalid. - g1h->g1_rem_set()->concurrentRefinementPass(this); - _traversals++; - unsigned cards_after = g1h->g1_rem_set()->conc_refine_cards(); - unsigned cards_during = cards_after-cards_before; - // If this is the first traversal in the current enabling - // and we did some cards, or if the number of cards found is decreasing - // sufficiently quickly, then keep going. Otherwise, sleep a while. - bool res = - (_first_traversal && cards_during > 0) - || - (!_first_traversal && cards_during * 3 < _last_cards_during * 2); - _last_cards_during = cards_during; - _first_traversal = false; - return res; -} - -void ConcurrentG1Refine::enable() { - MutexLocker x(G1ConcRefine_mon); - if (!_enabled) { - _enabled = true; - _first_traversal = true; _last_cards_during = 0; - G1ConcRefine_mon->notify_all(); - } -} - -unsigned ConcurrentG1Refine::disable() { - MutexLocker x(G1ConcRefine_mon); - if (_enabled) { - _enabled = false; - return _traversals; - } else { - return 0; - } -} - -void ConcurrentG1Refine::wait_for_ConcurrentG1Refine_enabled() { - G1ConcRefine_mon->lock(); - while (!_enabled) { - G1ConcRefine_mon->wait(Mutex::_no_safepoint_check_flag); - } - G1ConcRefine_mon->unlock(); - _traversals = 0; -}; - -void ConcurrentG1Refine::set_pya_restart() { - // If we're using the log-based RS barrier, the above will cause - // in-progress traversals of completed log buffers to quit early; we will - // also abandon all other buffers. - if (G1RSBarrierUseQueue) { - DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); - dcqs.abandon_logs(); - // Reset the post-yield actions. - _pya = PYA_continue; - _last_pya = PYA_continue; - } else { - _pya = PYA_restart; + if (_threads != NULL) { + for (int i = 0; i < _n_threads; i++) { + delete _threads[i]; + } + FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); } } -void ConcurrentG1Refine::set_pya_cancel() { - _pya = PYA_cancel; -} - -PostYieldAction ConcurrentG1Refine::get_pya() { - if (_pya != PYA_continue) { - jint val = _pya; - while (true) { - jint val_read = Atomic::cmpxchg(PYA_continue, &_pya, val); - if (val_read == val) { - PostYieldAction res = (PostYieldAction)val; - assert(res != PYA_continue, "Only the refine thread should reset."); - _last_pya = res; - return res; - } else { - val = val_read; - } +void ConcurrentG1Refine::threads_do(ThreadClosure *tc) { + if (_threads != NULL) { + for (int i = 0; i < _n_threads; i++) { + tc->do_thread(_threads[i]); } } - // QQQ WELL WHAT DO WE RETURN HERE??? - // make up something! - return PYA_continue; } -PostYieldAction ConcurrentG1Refine::get_last_pya() { - PostYieldAction res = _last_pya; - _last_pya = PYA_continue; - return res; -} - -bool ConcurrentG1Refine::do_traversal() { - return _cg1rThread->do_traversal(); -} int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) { size_t card_num = (card_ptr - _ct_bot); diff --git a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp index a2717150206549c61e5965d8fec576242b584bc2..badd81fb300c295e5d223e0d153b76882c14a803 100644 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp @@ -26,25 +26,9 @@ class ConcurrentG1RefineThread; class G1RemSet; -// What to do after a yield: -enum PostYieldAction { - PYA_continue, // Continue the traversal - PYA_restart, // Restart - PYA_cancel // It's been completed by somebody else: cancel. -}; - class ConcurrentG1Refine: public CHeapObj { - ConcurrentG1RefineThread* _cg1rThread; - - volatile jint _pya; - PostYieldAction _last_pya; - - static bool _enabled; // Protected by G1ConcRefine_mon. - unsigned _traversals; - - // Number of cards processed during last refinement traversal. - unsigned _first_traversal; - unsigned _last_cards_during; + ConcurrentG1RefineThread** _threads; + int _n_threads; // The cache for card refinement. bool _use_cache; @@ -74,37 +58,10 @@ class ConcurrentG1Refine: public CHeapObj { ~ConcurrentG1Refine(); void init(); // Accomplish some initialization that has to wait. + void stop(); - // Enabled Conc refinement, waking up thread if necessary. - void enable(); - - // Returns the number of traversals performed since this refiner was enabled. - unsigned disable(); - - // Requires G1ConcRefine_mon to be held. - bool enabled() { return _enabled; } - - // Returns only when G1 concurrent refinement has been enabled. - void wait_for_ConcurrentG1Refine_enabled(); - - // Do one concurrent refinement pass over the card table. Returns "true" - // if heuristics determine that another pass should be done immediately. - bool refine(); - - // Indicate that an in-progress refinement pass should start over. - void set_pya_restart(); - // Indicate that an in-progress refinement pass should quit. - void set_pya_cancel(); - - // Get the appropriate post-yield action. Also sets last_pya. - PostYieldAction get_pya(); - - // The last PYA read by "get_pya". - PostYieldAction get_last_pya(); - - bool do_traversal(); - - ConcurrentG1RefineThread* cg1rThread() { return _cg1rThread; } + // Iterate over the conc refine threads + void threads_do(ThreadClosure *tc); // If this is the first entry for the slot, writes into the cache and // returns NULL. If it causes an eviction, returns the evicted pointer. diff --git a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp index a5e0f39577967d47324f6904b0feb75d39a1fb1d..428a535f68143546b7b42ee85dfed7b5b60ef675 100644 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp @@ -30,12 +30,12 @@ // The CM thread is created when the G1 garbage collector is used ConcurrentG1RefineThread:: -ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) : +ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next, int worker_id) : ConcurrentGCThread(), + _worker_id(worker_id), + _active(false), + _next(next), _cg1r(cg1r), - _started(false), - _in_progress(false), - _do_traversal(false), _vtime_accum(0.0), _co_tracker(G1CRGroup), _interval_ms(5.0) @@ -43,66 +43,69 @@ ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) : create_and_start(); } -const long timeout = 200; // ms. +void ConcurrentG1RefineThread::sample_young_list_rs_lengths() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + G1CollectorPolicy* g1p = g1h->g1_policy(); + if (g1p->adaptive_young_list_length()) { + int regions_visited = 0; -void ConcurrentG1RefineThread::traversalBasedRefinement() { - _cg1r->wait_for_ConcurrentG1Refine_enabled(); - MutexLocker x(G1ConcRefine_mon); - while (_cg1r->enabled()) { - MutexUnlocker ux(G1ConcRefine_mon); - ResourceMark rm; - HandleMark hm; + g1h->young_list_rs_length_sampling_init(); + while (g1h->young_list_rs_length_sampling_more()) { + g1h->young_list_rs_length_sampling_next(); + ++regions_visited; - if (G1TraceConcurrentRefinement) { - gclog_or_tty->print_cr("G1-Refine starting pass"); - } - _sts.join(); - bool no_sleep = _cg1r->refine(); - _sts.leave(); - if (!no_sleep) { - MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); - // We do this only for the timeout; we don't expect this to be signalled. - CGC_lock->wait(Mutex::_no_safepoint_check_flag, timeout); + // we try to yield every time we visit 10 regions + if (regions_visited == 10) { + if (_sts.should_yield()) { + _sts.yield("G1 refine"); + // we just abandon the iteration + break; + } + regions_visited = 0; + } } + + g1p->check_prediction_validity(); } } -void ConcurrentG1RefineThread::queueBasedRefinement() { - DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); - // Wait for completed log buffers to exist. - { - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); - while (!_do_traversal && !dcqs.process_completed_buffers() && - !_should_terminate) { - DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag); - } - } +void ConcurrentG1RefineThread::run() { + initialize_in_thread(); + _vtime_start = os::elapsedVTime(); + wait_for_universe_init(); - if (_should_terminate) { - return; - } + _co_tracker.enable(); + _co_tracker.start(); - // Now we take them off (this doesn't hold locks while it applies - // closures.) (If we did a full collection, then we'll do a full - // traversal. - _sts.join(); - if (_do_traversal) { - (void)_cg1r->refine(); - switch (_cg1r->get_last_pya()) { - case PYA_cancel: case PYA_continue: - // Continue was caught and handled inside "refine". If it's still - // "continue" when we get here, we're done. - _do_traversal = false; - break; - case PYA_restart: - assert(_do_traversal, "Because of Full GC."); - break; + while (!_should_terminate) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + // Wait for completed log buffers to exist. + { + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); + while (((_worker_id == 0 && !dcqs.process_completed_buffers()) || + (_worker_id > 0 && !is_active())) && + !_should_terminate) { + DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag); + } + } + + if (_should_terminate) { + return; } - } else { + + // Now we take them off (this doesn't hold locks while it applies + // closures.) (If we did a full collection, then we'll do a full + // traversal. + _sts.join(); int n_logs = 0; int lower_limit = 0; double start_vtime_sec; // only used when G1SmoothConcRefine is on int prev_buffer_num; // only used when G1SmoothConcRefine is on + // This thread activation threshold + int threshold = DCQBarrierProcessCompletedThreshold * _worker_id; + // Next thread activation threshold + int next_threshold = threshold + DCQBarrierProcessCompletedThreshold; + int deactivation_threshold = MAX2(threshold - DCQBarrierProcessCompletedThreshold / 2, 0); if (G1SmoothConcRefine) { lower_limit = 0; @@ -111,29 +114,49 @@ void ConcurrentG1RefineThread::queueBasedRefinement() { } else { lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now. } - while (dcqs.apply_closure_to_completed_buffer(0, lower_limit)) { + while (dcqs.apply_closure_to_completed_buffer(_worker_id, lower_limit)) { double end_vtime_sec; double elapsed_vtime_sec; int elapsed_vtime_ms; - int curr_buffer_num; + int curr_buffer_num = (int) dcqs.completed_buffers_num(); if (G1SmoothConcRefine) { end_vtime_sec = os::elapsedVTime(); elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0); - curr_buffer_num = (int) dcqs.completed_buffers_num(); if (curr_buffer_num > prev_buffer_num || - curr_buffer_num > DCQBarrierProcessCompletedThreshold) { + curr_buffer_num > next_threshold) { decreaseInterval(elapsed_vtime_ms); } else if (curr_buffer_num < prev_buffer_num) { increaseInterval(elapsed_vtime_ms); } } - - sample_young_list_rs_lengths(); + if (_worker_id == 0) { + sample_young_list_rs_lengths(); + } else if (curr_buffer_num < deactivation_threshold) { + // If the number of the buffer has fallen below our threshold + // we should deactivate. The predecessor will reactivate this + // thread should the number of the buffers cross the threshold again. + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); + deactivate(); + if (G1TraceConcurrentRefinement) { + gclog_or_tty->print_cr("G1-Refine-deactivated worker %d", _worker_id); + } + break; + } _co_tracker.update(false); + // Check if we need to activate the next thread. + if (curr_buffer_num > next_threshold && _next != NULL && !_next->is_active()) { + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); + _next->activate(); + DirtyCardQ_CBL_mon->notify_all(); + if (G1TraceConcurrentRefinement) { + gclog_or_tty->print_cr("G1-Refine-activated worker %d", _next->_worker_id); + } + } + if (G1SmoothConcRefine) { prev_buffer_num = curr_buffer_num; _sts.leave(); @@ -143,56 +166,9 @@ void ConcurrentG1RefineThread::queueBasedRefinement() { } n_logs++; } - // Make sure we harvest the PYA, if any. - (void)_cg1r->get_pya(); - } - _sts.leave(); -} - -void ConcurrentG1RefineThread::sample_young_list_rs_lengths() { - G1CollectedHeap* g1h = G1CollectedHeap::heap(); - G1CollectorPolicy* g1p = g1h->g1_policy(); - if (g1p->adaptive_young_list_length()) { - int regions_visited = 0; - - g1h->young_list_rs_length_sampling_init(); - while (g1h->young_list_rs_length_sampling_more()) { - g1h->young_list_rs_length_sampling_next(); - ++regions_visited; - - // we try to yield every time we visit 10 regions - if (regions_visited == 10) { - if (_sts.should_yield()) { - _sts.yield("G1 refine"); - // we just abandon the iteration - break; - } - regions_visited = 0; - } - } - - g1p->check_prediction_validity(); - } -} - -void ConcurrentG1RefineThread::run() { - initialize_in_thread(); - _vtime_start = os::elapsedVTime(); - wait_for_universe_init(); - - _co_tracker.enable(); - _co_tracker.start(); - - while (!_should_terminate) { - // wait until started is set. - if (G1RSBarrierUseQueue) { - queueBasedRefinement(); - } else { - traversalBasedRefinement(); - } - _sts.join(); - _co_tracker.update(); + _co_tracker.update(false); _sts.leave(); + if (os::supports_vtime()) { _vtime_accum = (os::elapsedVTime() - _vtime_start); } else { @@ -240,7 +216,3 @@ void ConcurrentG1RefineThread::print() { Thread::print(); gclog_or_tty->cr(); } - -void ConcurrentG1RefineThread::set_do_traversal(bool b) { - _do_traversal = b; -} diff --git a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp index 69f272c78954a62d4325832f512f5011923dd31a..75f45a202d2abf0ba246ea3fdb30d0b8b023e1c3 100644 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp @@ -33,21 +33,26 @@ class ConcurrentG1RefineThread: public ConcurrentGCThread { double _vtime_start; // Initial virtual time. double _vtime_accum; // Initial virtual time. + int _worker_id; + // The refinement threads collection is linked list. A predecessor can activate a successor + // when the number of the rset update buffer crosses a certain threshold. A successor + // would self-deactivate when the number of the buffers falls below the threshold. + bool _active; + ConcurrentG1RefineThread * _next; public: virtual void run(); + bool is_active() { return _active; } + void activate() { _active = true; } + void deactivate() { _active = false; } + private: ConcurrentG1Refine* _cg1r; - bool _started; - bool _in_progress; - volatile bool _restart; COTracker _co_tracker; double _interval_ms; - bool _do_traversal; - void decreaseInterval(int processing_time_ms) { double min_interval_ms = (double) processing_time_ms; _interval_ms = 0.8 * _interval_ms; @@ -63,16 +68,12 @@ class ConcurrentG1RefineThread: public ConcurrentGCThread { void sleepBeforeNextCycle(); - void traversalBasedRefinement(); - - void queueBasedRefinement(); - // For use by G1CollectedHeap, which is a friend. static SuspendibleThreadSet* sts() { return &_sts; } public: // Constructor - ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r); + ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread* next, int worker_id); // Printing void print(); @@ -82,23 +83,11 @@ class ConcurrentG1RefineThread: public ConcurrentGCThread { ConcurrentG1Refine* cg1r() { return _cg1r; } - - void set_started() { _started = true; } - void clear_started() { _started = false; } - bool started() { return _started; } - - void set_in_progress() { _in_progress = true; } - void clear_in_progress() { _in_progress = false; } - bool in_progress() { return _in_progress; } - - void set_do_traversal(bool b); - bool do_traversal() { return _do_traversal; } - void sample_young_list_rs_lengths(); // Yield for GC void yield(); // shutdown - static void stop(); + void stop(); }; diff --git a/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp index c382778c212d9153e6635c51898f0b9d274470fe..a91f319d0674ef8385fa38b3cb481e445fa70471 100644 --- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp @@ -80,5 +80,5 @@ class ConcurrentMarkThread: public ConcurrentGCThread { void yield(); // shutdown - static void stop(); + void stop(); }; diff --git a/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp b/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp index 9a483dbce86cf6724379dbfb3090eb4438610e0f..db6a7d09ddcf4e1d9abc413eb0b72c1b6ac6c1e3 100644 --- a/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp +++ b/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp @@ -73,7 +73,7 @@ class ConcurrentZFThread: public ConcurrentGCThread { // while holding the ZF_needed_mon lock. // shutdown - static void stop(); + void stop(); // Stats static void note_region_alloc() {_region_allocs++; } diff --git a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp index ec26c6a46ef899522bd75d76f46d32921ac690f2..373da8b9f2d75740783360d047510f41610183e5 100644 --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp @@ -234,7 +234,7 @@ bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i, nd = get_completed_buffer_lock(stop_at); } bool res = apply_closure_to_completed_buffer_helper(worker_i, nd); - if (res) _processed_buffers_rs_thread++; + if (res) Atomic::inc(&_processed_buffers_rs_thread); return res; } diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp index 8f485bc540e388649d7d69934f3088a3f7f423fa..22ef3e37247e0a666d53017823fd2b780132df16 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -447,7 +447,7 @@ void YoungList::print() { } void G1CollectedHeap::stop_conc_gc_threads() { - _cg1r->cg1rThread()->stop(); + _cg1r->stop(); _czft->stop(); _cmThread->stop(); } @@ -1001,12 +1001,8 @@ void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs, gc_epilogue(true); - // Abandon concurrent refinement. This must happen last: in the - // dirty-card logging system, some cards may be dirty by weak-ref - // processing, and may be enqueued. But the whole card table is - // dirtied, so this should abandon those logs, and set "do_traversal" - // to true. - concurrent_g1_refine()->set_pya_restart(); + // Discard all rset updates + JavaThread::dirty_card_queue_set().abandon_logs(); assert(!G1DeferredRSUpdate || (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any"); assert(regions_accounted_for(), "Region leakage!"); @@ -1521,12 +1517,12 @@ jint G1CollectedHeap::initialize() { SATB_Q_FL_lock, 0, Shared_SATB_Q_lock); - if (G1RSBarrierUseQueue) { - JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, - DirtyCardQ_FL_lock, - G1DirtyCardQueueMax, - Shared_DirtyCardQ_lock); - } + + JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, + DirtyCardQ_FL_lock, + G1DirtyCardQueueMax, + Shared_DirtyCardQ_lock); + if (G1DeferredRSUpdate) { dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, DirtyCardQ_FL_lock, @@ -2249,6 +2245,15 @@ void G1CollectedHeap::print_on(outputStream* st) const { _hrs->iterate(&blk); } +class PrintOnThreadsClosure : public ThreadClosure { + outputStream* _st; +public: + PrintOnThreadsClosure(outputStream* st) : _st(st) { } + virtual void do_thread(Thread *t) { + t->print_on(_st); + } +}; + void G1CollectedHeap::print_gc_threads_on(outputStream* st) const { if (ParallelGCThreads > 0) { workers()->print_worker_threads(); @@ -2256,8 +2261,9 @@ void G1CollectedHeap::print_gc_threads_on(outputStream* st) const { st->print("\"G1 concurrent mark GC Thread\" "); _cmThread->print(); st->cr(); - st->print("\"G1 concurrent refinement GC Thread\" "); - _cg1r->cg1rThread()->print_on(st); + st->print("\"G1 concurrent refinement GC Threads\" "); + PrintOnThreadsClosure p(st); + _cg1r->threads_do(&p); st->cr(); st->print("\"G1 zero-fill GC Thread\" "); _czft->print_on(st); @@ -2269,7 +2275,7 @@ void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const { workers()->threads_do(tc); } tc->do_thread(_cmThread); - tc->do_thread(_cg1r->cg1rThread()); + _cg1r->threads_do(tc); tc->do_thread(_czft); } diff --git a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp index c09a62ddf4949a517b446ac917ed783d70b92fdf..2833cd704f088f44d278eb061ef48d9dad5860a2 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp @@ -167,11 +167,6 @@ G1CollectorPolicy::G1CollectorPolicy() : _all_full_gc_times_ms(new NumberSeq()), - _conc_refine_enabled(0), - _conc_refine_zero_traversals(0), - _conc_refine_max_traversals(0), - _conc_refine_current_delta(G1ConcRefineInitialDelta), - // G1PausesBtwnConcMark defaults to -1 // so the hack is to do the cast QQQ FIXME _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark), @@ -1634,9 +1629,8 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) { print_stats(1, "Parallel Time", _cur_collection_par_time_ms); print_par_stats(2, "Update RS (Start)", _par_last_update_rs_start_times_ms, false); print_par_stats(2, "Update RS", _par_last_update_rs_times_ms); - if (G1RSBarrierUseQueue) - print_par_buffers(3, "Processed Buffers", - _par_last_update_rs_processed_buffers, true); + print_par_buffers(3, "Processed Buffers", + _par_last_update_rs_processed_buffers, true); print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms); print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms); print_par_stats(2, "Scan-Only Scanning", _par_last_scan_only_times_ms); @@ -1649,9 +1643,8 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) { print_stats(1, "Clear CT", _cur_clear_ct_time_ms); } else { print_stats(1, "Update RS", update_rs_time); - if (G1RSBarrierUseQueue) - print_stats(2, "Processed Buffers", - (int)update_rs_processed_buffers); + print_stats(2, "Processed Buffers", + (int)update_rs_processed_buffers); print_stats(1, "Ext Root Scanning", ext_root_scan_time); print_stats(1, "Mark Stack Scanning", mark_stack_scan_time); print_stats(1, "Scan-Only Scanning", scan_only_time); @@ -2467,18 +2460,6 @@ void G1CollectorPolicy::print_tracing_info() const { (double) _region_num_young / (double) all_region_num * 100.0, _region_num_tenured, (double) _region_num_tenured / (double) all_region_num * 100.0); - - if (!G1RSBarrierUseQueue) { - gclog_or_tty->print_cr("Of %d times conc refinement was enabled, %d (%7.2f%%) " - "did zero traversals.", - _conc_refine_enabled, _conc_refine_zero_traversals, - _conc_refine_enabled > 0 ? - 100.0 * (float)_conc_refine_zero_traversals/ - (float)_conc_refine_enabled : 0.0); - gclog_or_tty->print_cr(" Max # of traversals = %d.", - _conc_refine_max_traversals); - gclog_or_tty->print_cr(""); - } } if (TraceGen1Time) { if (_all_full_gc_times_ms->num() > 0) { @@ -2500,38 +2481,6 @@ void G1CollectorPolicy::print_yg_surv_rate_info() const { #endif // PRODUCT } -void G1CollectorPolicy::update_conc_refine_data() { - unsigned traversals = _g1->concurrent_g1_refine()->disable(); - if (traversals == 0) _conc_refine_zero_traversals++; - _conc_refine_max_traversals = MAX2(_conc_refine_max_traversals, - (size_t)traversals); - - if (G1PolicyVerbose > 1) - gclog_or_tty->print_cr("Did a CR traversal series: %d traversals.", traversals); - double multiplier = 1.0; - if (traversals == 0) { - multiplier = 4.0; - } else if (traversals > (size_t)G1ConcRefineTargTraversals) { - multiplier = 1.0/1.5; - } else if (traversals < (size_t)G1ConcRefineTargTraversals) { - multiplier = 1.5; - } - if (G1PolicyVerbose > 1) { - gclog_or_tty->print_cr(" Multiplier = %7.2f.", multiplier); - gclog_or_tty->print(" Delta went from %d regions to ", - _conc_refine_current_delta); - } - _conc_refine_current_delta = - MIN2(_g1->n_regions(), - (size_t)(_conc_refine_current_delta * multiplier)); - _conc_refine_current_delta = - MAX2(_conc_refine_current_delta, (size_t)1); - if (G1PolicyVerbose > 1) { - gclog_or_tty->print_cr("%d regions.", _conc_refine_current_delta); - } - _conc_refine_enabled++; -} - bool G1CollectorPolicy::should_add_next_region_to_young_list() { assert(in_young_gc_mode(), "should be in young GC mode"); diff --git a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp index 3043b7b674e00dd2ec60b1a1a7c2151378712d97..8b8927571a8f831feb6ed415bc6163739f4c6253 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp @@ -637,18 +637,6 @@ protected: // The number of collection pauses at the end of the last mark. size_t _n_pauses_at_mark_end; - // ==== This section is for stats related to starting Conc Refinement on time. - size_t _conc_refine_enabled; - size_t _conc_refine_zero_traversals; - size_t _conc_refine_max_traversals; - // In # of heap regions. - size_t _conc_refine_current_delta; - - // At the beginning of a collection pause, update the variables above, - // especially the "delta". - void update_conc_refine_data(); - // ==== - // Stash a pointer to the g1 heap. G1CollectedHeap* _g1; diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/src/share/vm/gc_implementation/g1/g1RemSet.cpp index 32104bc82124bad1b642d25fd832419dada2e873..b6cf7d794d14fbc0759c2ec0eb845e1f99a4b7a1 100644 --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp @@ -105,28 +105,6 @@ StupidG1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, _g1->heap_region_iterate(&rc); } -class UpdateRSOutOfRegionClosure: public HeapRegionClosure { - G1CollectedHeap* _g1h; - ModRefBarrierSet* _mr_bs; - UpdateRSOopClosure _cl; - int _worker_i; -public: - UpdateRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) : - _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i), - _mr_bs(g1->mr_bs()), - _worker_i(worker_i), - _g1h(g1) - {} - bool doHeapRegion(HeapRegion* r) { - if (!r->in_collection_set() && !r->continuesHumongous()) { - _cl.set_from(r); - r->set_next_filter_kind(HeapRegionDCTOC::OutOfRegionFilterKind); - _mr_bs->mod_oop_in_space_iterate(r, &_cl, true, true); - } - return false; - } -}; - class VerifyRSCleanCardOopClosure: public OopClosure { G1CollectedHeap* _g1; public: @@ -350,30 +328,17 @@ void HRInto_G1RemSet::updateRS(int worker_i) { double start = os::elapsedTime(); _g1p->record_update_rs_start_time(worker_i, start * 1000.0); - if (G1RSBarrierUseQueue && !cg1r->do_traversal()) { - // Apply the appropriate closure to all remaining log entries. - _g1->iterate_dirty_card_closure(false, worker_i); - // Now there should be no dirty cards. - if (G1RSLogCheckCardTable) { - CountNonCleanMemRegionClosure cl(_g1); - _ct_bs->mod_card_iterate(&cl); - // XXX This isn't true any more: keeping cards of young regions - // marked dirty broke it. Need some reasonable fix. - guarantee(cl.n() == 0, "Card table should be clean."); - } - } else { - UpdateRSOutOfRegionClosure update_rs(_g1, worker_i); - _g1->heap_region_iterate(&update_rs); - // We did a traversal; no further one is necessary. - if (G1RSBarrierUseQueue) { - assert(cg1r->do_traversal(), "Or we shouldn't have gotten here."); - cg1r->set_pya_cancel(); - } - if (_cg1r->use_cache()) { - _cg1r->clear_and_record_card_counts(); - _cg1r->clear_hot_cache(); - } + // Apply the appropriate closure to all remaining log entries. + _g1->iterate_dirty_card_closure(false, worker_i); + // Now there should be no dirty cards. + if (G1RSLogCheckCardTable) { + CountNonCleanMemRegionClosure cl(_g1); + _ct_bs->mod_card_iterate(&cl); + // XXX This isn't true any more: keeping cards of young regions + // marked dirty broke it. Need some reasonable fix. + guarantee(cl.n() == 0, "Card table should be clean."); } + _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0); } @@ -486,11 +451,6 @@ HRInto_G1RemSet::scanNewRefsRS(OopsInHeapRegionClosure* oc, * 1000.0); } -void HRInto_G1RemSet::set_par_traversal(bool b) { - _par_traversal_in_progress = b; - HeapRegionRemSet::set_par_traversal(b); -} - void HRInto_G1RemSet::cleanupHRRS() { HeapRegionRemSet::cleanup(); } @@ -527,7 +487,7 @@ HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, updateRS(worker_i); scanNewRefsRS(oc, worker_i); } else { - _g1p->record_update_rs_start_time(worker_i, os::elapsedTime()); + _g1p->record_update_rs_start_time(worker_i, os::elapsedTime() * 1000.0); _g1p->record_update_rs_processed_buffers(worker_i, 0.0); _g1p->record_update_rs_time(worker_i, 0.0); _g1p->record_scan_new_refs_time(worker_i, 0.0); @@ -535,7 +495,7 @@ HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, if (G1ParallelRSetScanningEnabled || (worker_i == 0)) { scanRS(oc, worker_i); } else { - _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime()); + _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime() * 1000.0); _g1p->record_scan_rs_time(worker_i, 0.0); } } else { @@ -562,11 +522,6 @@ prepare_for_oops_into_collection_set_do() { if (ParallelGCThreads > 0) { set_par_traversal(true); _seq_task->set_par_threads((int)n_workers()); - if (cg1r->do_traversal()) { - updateRS(0); - // Have to do this again after updaters - cleanupHRRS(); - } } guarantee( _cards_scanned == NULL, "invariant" ); _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers()); @@ -647,11 +602,8 @@ void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() { _g1->collection_set_iterate(&iterClosure); // Set all cards back to clean. _g1->cleanUpCardTable(); + if (ParallelGCThreads > 0) { - ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine(); - if (cg1r->do_traversal()) { - cg1r->cg1rThread()->set_do_traversal(false); - } set_par_traversal(false); } @@ -721,139 +673,8 @@ void HRInto_G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm, } -class ConcRefineRegionClosure: public HeapRegionClosure { - G1CollectedHeap* _g1h; - CardTableModRefBS* _ctbs; - ConcurrentGCThread* _cgc_thrd; - ConcurrentG1Refine* _cg1r; - unsigned _cards_processed; - UpdateRSOopClosure _update_rs_oop_cl; -public: - ConcRefineRegionClosure(CardTableModRefBS* ctbs, - ConcurrentG1Refine* cg1r, - HRInto_G1RemSet* g1rs) : - _ctbs(ctbs), _cg1r(cg1r), _cgc_thrd(cg1r->cg1rThread()), - _update_rs_oop_cl(g1rs), _cards_processed(0), - _g1h(G1CollectedHeap::heap()) - {} - - bool doHeapRegion(HeapRegion* r) { - if (!r->in_collection_set() && - !r->continuesHumongous() && - !r->is_young()) { - _update_rs_oop_cl.set_from(r); - UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl); - - // For each run of dirty card in the region: - // 1) Clear the cards. - // 2) Process the range corresponding to the run, adding any - // necessary RS entries. - // 1 must precede 2, so that a concurrent modification redirties the - // card. If a processing attempt does not succeed, because it runs - // into an unparseable region, we will do binary search to find the - // beginning of the next parseable region. - HeapWord* startAddr = r->bottom(); - HeapWord* endAddr = r->used_region().end(); - HeapWord* lastAddr; - HeapWord* nextAddr; - - for (nextAddr = lastAddr = startAddr; - nextAddr < endAddr; - nextAddr = lastAddr) { - MemRegion dirtyRegion; - - // Get and clear dirty region from card table - MemRegion next_mr(nextAddr, endAddr); - dirtyRegion = - _ctbs->dirty_card_range_after_reset( - next_mr, - true, CardTableModRefBS::clean_card_val()); - assert(dirtyRegion.start() >= nextAddr, - "returned region inconsistent?"); - - if (!dirtyRegion.is_empty()) { - HeapWord* stop_point = - r->object_iterate_mem_careful(dirtyRegion, - &update_rs_obj_cl); - if (stop_point == NULL) { - lastAddr = dirtyRegion.end(); - _cards_processed += - (int) (dirtyRegion.word_size() / CardTableModRefBS::card_size_in_words); - } else { - // We're going to skip one or more cards that we can't parse. - HeapWord* next_parseable_card = - r->next_block_start_careful(stop_point); - // Round this up to a card boundary. - next_parseable_card = - _ctbs->addr_for(_ctbs->byte_after_const(next_parseable_card)); - // Now we invalidate the intervening cards so we'll see them - // again. - MemRegion remaining_dirty = - MemRegion(stop_point, dirtyRegion.end()); - MemRegion skipped = - MemRegion(stop_point, next_parseable_card); - _ctbs->invalidate(skipped.intersection(remaining_dirty)); - - // Now start up again where we can parse. - lastAddr = next_parseable_card; - - // Count how many we did completely. - _cards_processed += - (stop_point - dirtyRegion.start()) / - CardTableModRefBS::card_size_in_words; - } - // Allow interruption at regular intervals. - // (Might need to make them more regular, if we get big - // dirty regions.) - if (_cgc_thrd != NULL) { - if (_cgc_thrd->should_yield()) { - _cgc_thrd->yield(); - switch (_cg1r->get_pya()) { - case PYA_continue: - // This may have changed: re-read. - endAddr = r->used_region().end(); - continue; - case PYA_restart: case PYA_cancel: - return true; - } - } - } - } else { - break; - } - } - } - // A good yield opportunity. - if (_cgc_thrd != NULL) { - if (_cgc_thrd->should_yield()) { - _cgc_thrd->yield(); - switch (_cg1r->get_pya()) { - case PYA_restart: case PYA_cancel: - return true; - default: - break; - } - - } - } - return false; - } - - unsigned cards_processed() { return _cards_processed; } -}; - - -void HRInto_G1RemSet::concurrentRefinementPass(ConcurrentG1Refine* cg1r) { - ConcRefineRegionClosure cr_cl(ct_bs(), cg1r, this); - _g1->heap_region_iterate(&cr_cl); - _conc_refine_traversals++; - _conc_refine_cards += cr_cl.cards_processed(); -} - static IntHistogram out_of_histo(50, 50); - - void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) { // If the card is no longer dirty, nothing to do. if (*card_ptr != CardTableModRefBS::dirty_card_val()) return; @@ -983,10 +804,16 @@ public: HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; } }; +class PrintRSThreadVTimeClosure : public ThreadClosure { +public: + virtual void do_thread(Thread *t) { + ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t; + gclog_or_tty->print(" %5.2f", crt->vtime_accum()); + } +}; + void HRInto_G1RemSet::print_summary_info() { G1CollectedHeap* g1 = G1CollectedHeap::heap(); - ConcurrentG1RefineThread* cg1r_thrd = - g1->concurrent_g1_refine()->cg1rThread(); #if CARD_REPEAT_HISTO gclog_or_tty->print_cr("\nG1 card_repeat count histogram: "); @@ -999,15 +826,13 @@ void HRInto_G1RemSet::print_summary_info() { gclog_or_tty->print_cr(" # of CS ptrs --> # of cards with that number."); out_of_histo.print_on(gclog_or_tty); } - gclog_or_tty->print_cr("\n Concurrent RS processed %d cards in " - "%5.2fs.", - _conc_refine_cards, cg1r_thrd->vtime_accum()); - + gclog_or_tty->print_cr("\n Concurrent RS processed %d cards", + _conc_refine_cards); DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); jint tot_processed_buffers = dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread(); gclog_or_tty->print_cr(" Of %d completed buffers:", tot_processed_buffers); - gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS thread.", + gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS threads.", dcqs.processed_buffers_rs_thread(), 100.0*(float)dcqs.processed_buffers_rs_thread()/ (float)tot_processed_buffers); @@ -1015,15 +840,12 @@ void HRInto_G1RemSet::print_summary_info() { dcqs.processed_buffers_mut(), 100.0*(float)dcqs.processed_buffers_mut()/ (float)tot_processed_buffers); - gclog_or_tty->print_cr(" Did %d concurrent refinement traversals.", - _conc_refine_traversals); - if (!G1RSBarrierUseQueue) { - gclog_or_tty->print_cr(" Scanned %8.2f cards/traversal.", - _conc_refine_traversals > 0 ? - (float)_conc_refine_cards/(float)_conc_refine_traversals : - 0); - } + gclog_or_tty->print_cr(" Conc RS threads times(s)"); + PrintRSThreadVTimeClosure p; + gclog_or_tty->print(" "); + g1->concurrent_g1_refine()->threads_do(&p); gclog_or_tty->print_cr(""); + if (G1UseHRIntoRS) { HRRSStatsIter blk; g1->heap_region_iterate(&blk); diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.hpp b/src/share/vm/gc_implementation/g1/g1RemSet.hpp index e6439141196d5c406286efe03e93b5d47fb9b4c0..4d58e0be6a1dcbf9d0fbcbf3a66ba9cb67bbbeaa 100644 --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp @@ -33,15 +33,12 @@ class ConcurrentG1Refine; class G1RemSet: public CHeapObj { protected: G1CollectedHeap* _g1; - - unsigned _conc_refine_traversals; unsigned _conc_refine_cards; - size_t n_workers(); public: G1RemSet(G1CollectedHeap* g1) : - _g1(g1), _conc_refine_traversals(0), _conc_refine_cards(0) + _g1(g1), _conc_refine_cards(0) {} // Invoke "blk->do_oop" on all pointers into the CS in object in regions @@ -81,19 +78,11 @@ public: virtual void scrub_par(BitMap* region_bm, BitMap* card_bm, int worker_num, int claim_val) = 0; - // Do any "refinement" activity that might be appropriate to the given - // G1RemSet. If "refinement" has iterateive "passes", do one pass. - // If "t" is non-NULL, it is the thread performing the refinement. - // Default implementation does nothing. - virtual void concurrentRefinementPass(ConcurrentG1Refine* cg1r) {} - // Refine the card corresponding to "card_ptr". If "sts" is non-NULL, // join and leave around parts that must be atomic wrt GC. (NULL means // being done at a safepoint.) virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {} - unsigned conc_refine_cards() { return _conc_refine_cards; } - // Print any relevant summary info. virtual void print_summary_info() {} @@ -153,7 +142,7 @@ protected: // progress. If so, then cards added to remembered sets should also have // their references into the collection summarized in "_new_refs". bool _par_traversal_in_progress; - void set_par_traversal(bool b); + void set_par_traversal(bool b) { _par_traversal_in_progress = b; } GrowableArray** _new_refs; void new_refs_iterate(OopClosure* cl); @@ -194,7 +183,6 @@ public: void scrub_par(BitMap* region_bm, BitMap* card_bm, int worker_num, int claim_val); - virtual void concurrentRefinementPass(ConcurrentG1Refine* t); virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i); virtual void print_summary_info(); diff --git a/src/share/vm/gc_implementation/g1/g1_globals.hpp b/src/share/vm/gc_implementation/g1/g1_globals.hpp index e8f75aa13c2268f38108cf724a3219ecc908b140..5b296e204d00d2a133b57d8a27f6fda568c5a3e9 100644 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp @@ -147,9 +147,6 @@ develop(bool, G1PrintCTFilterStats, false, \ "If true, print stats on RS filtering effectiveness") \ \ - develop(bool, G1RSBarrierUseQueue, true, \ - "If true, use queueing RS barrier") \ - \ develop(bool, G1DeferredRSUpdate, true, \ "If true, use deferred RS updates") \ \ @@ -253,6 +250,10 @@ \ experimental(bool, G1ParallelRSetScanningEnabled, false, \ "Enables the parallelization of remembered set scanning " \ - "during evacuation pauses") + "during evacuation pauses") \ + \ + product(uintx, G1ParallelRSetThreads, 0, \ + "If non-0 is the number of parallel rem set update threads, " \ + "otherwise the value is determined ergonomically.") G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG) diff --git a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp index 92bfe7c55a2c9bf168b3150a2ca8f7f068633026..648a49c305f6e62c0b1816aef390390e66779ccb 100644 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp @@ -1052,14 +1052,6 @@ bool OtherRegionsTable::contains_reference_locked(oop* from) const { } - -bool HeapRegionRemSet::_par_traversal = false; - -void HeapRegionRemSet::set_par_traversal(bool b) { - assert(_par_traversal != b, "Proper alternation..."); - _par_traversal = b; -} - int HeapRegionRemSet::num_par_rem_sets() { // We always have at least two, so that a mutator thread can claim an // id and add to a rem set. diff --git a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp index aa8d3346fa9f736999e58c4c03f506b64ee44e08..d6309815251db35787c84a393944f1a90f7a10bc 100644 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp @@ -177,8 +177,6 @@ private: G1BlockOffsetSharedArray* _bosa; G1BlockOffsetSharedArray* bosa() const { return _bosa; } - static bool _par_traversal; - OtherRegionsTable _other_regions; // One set bit for every region that has an entry for this one. @@ -211,8 +209,6 @@ public: HeapRegion* hr); static int num_par_rem_sets(); - static bool par_traversal() { return _par_traversal; } - static void set_par_traversal(bool b); HeapRegion* hr() const { return _other_regions.hr(); diff --git a/src/share/vm/gc_implementation/g1/ptrQueue.cpp b/src/share/vm/gc_implementation/g1/ptrQueue.cpp index 1da8d520637af35906e169722e6ef0e11aa64750..9f9d9dd1839993db27c098cd831acbcd266743e1 100644 --- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp @@ -172,7 +172,7 @@ void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_ _n_completed_buffers++; if (!_process_completed && - _n_completed_buffers == _process_completed_threshold) { + _n_completed_buffers >= _process_completed_threshold) { _process_completed = true; if (_notify_when_complete) _cbl_mon->notify_all(); diff --git a/src/share/vm/gc_implementation/includeDB_gc_g1 b/src/share/vm/gc_implementation/includeDB_gc_g1 index ab466d70e1bf1af89822167b62e73afb2f7a74de..67f3932bed3b4dae20ea43774c190eb825394edf 100644 --- a/src/share/vm/gc_implementation/includeDB_gc_g1 +++ b/src/share/vm/gc_implementation/includeDB_gc_g1 @@ -49,6 +49,8 @@ concurrentG1Refine.cpp space.inline.hpp concurrentG1Refine.hpp globalDefinitions.hpp concurrentG1Refine.hpp allocation.hpp +concurrentG1Refine.hpp thread.hpp + concurrentG1RefineThread.cpp concurrentG1Refine.hpp concurrentG1RefineThread.cpp concurrentG1RefineThread.hpp diff --git a/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp b/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp index 8ed8b809e2a3f3ffaf2270579d58c3ada028fb65..d046323d2669974b473ac4d2762d28fb77065d11 100644 --- a/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp +++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp @@ -27,13 +27,12 @@ # include "incls/_precompiled.incl" # include "incls/_concurrentGCThread.cpp.incl" -bool ConcurrentGCThread::_should_terminate = false; -bool ConcurrentGCThread::_has_terminated = false; int ConcurrentGCThread::_CGC_flag = CGC_nil; SuspendibleThreadSet ConcurrentGCThread::_sts; -ConcurrentGCThread::ConcurrentGCThread() { +ConcurrentGCThread::ConcurrentGCThread() : + _should_terminate(false), _has_terminated(false) { _sts.initialize(); }; diff --git a/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp b/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp index db6cc903ddf49666b32d18ea0bbe3c32668055e1..a4ebd858d28375e5d89af11ffbcc6ca8716273ef 100644 --- a/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp +++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp @@ -72,8 +72,8 @@ class ConcurrentGCThread: public NamedThread { friend class VMStructs; protected: - static bool _should_terminate; - static bool _has_terminated; + bool _should_terminate; + bool _has_terminated; enum CGC_flag_type { CGC_nil = 0x0, diff --git a/src/share/vm/memory/cardTableRS.cpp b/src/share/vm/memory/cardTableRS.cpp index 133e3d28ded95c6286e27a41faf3c5b66b9ba990..da80ddf9725e6859796d4cb7378ba91d2e8422cf 100644 --- a/src/share/vm/memory/cardTableRS.cpp +++ b/src/share/vm/memory/cardTableRS.cpp @@ -33,12 +33,8 @@ CardTableRS::CardTableRS(MemRegion whole_heap, { #ifndef SERIALGC if (UseG1GC) { - if (G1RSBarrierUseQueue) { _ct_bs = new G1SATBCardTableLoggingModRefBS(whole_heap, max_covered_regions); - } else { - _ct_bs = new G1SATBCardTableModRefBS(whole_heap, max_covered_regions); - } } else { _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions); } diff --git a/src/share/vm/runtime/mutexLocker.cpp b/src/share/vm/runtime/mutexLocker.cpp index c10e08c039a203cd7d8c7e492d9a535610a0c245..437192d6ba337402b9799bd353e9a77631c899fa 100644 --- a/src/share/vm/runtime/mutexLocker.cpp +++ b/src/share/vm/runtime/mutexLocker.cpp @@ -70,7 +70,6 @@ Monitor* FullGCCount_lock = NULL; Monitor* CMark_lock = NULL; Monitor* ZF_mon = NULL; Monitor* Cleanup_mon = NULL; -Monitor* G1ConcRefine_mon = NULL; Mutex* SATB_Q_FL_lock = NULL; Monitor* SATB_Q_CBL_mon = NULL; Mutex* Shared_SATB_Q_lock = NULL; @@ -168,7 +167,6 @@ void mutex_init() { def(CMark_lock , Monitor, nonleaf, true ); // coordinate concurrent mark thread def(ZF_mon , Monitor, leaf, true ); def(Cleanup_mon , Monitor, nonleaf, true ); - def(G1ConcRefine_mon , Monitor, nonleaf, true ); def(SATB_Q_FL_lock , Mutex , special, true ); def(SATB_Q_CBL_mon , Monitor, nonleaf, true ); def(Shared_SATB_Q_lock , Mutex, nonleaf, true ); diff --git a/src/share/vm/runtime/mutexLocker.hpp b/src/share/vm/runtime/mutexLocker.hpp index e020f5a829ccf8fac18b0ffc2e2a6c9f74944090..e17150643850b174051c4a52ddc3fd62ff4f59ad 100644 --- a/src/share/vm/runtime/mutexLocker.hpp +++ b/src/share/vm/runtime/mutexLocker.hpp @@ -63,9 +63,6 @@ extern Monitor* FullGCCount_lock; // in support of "concurrent" f extern Monitor* CMark_lock; // used for concurrent mark thread coordination extern Monitor* ZF_mon; // used for G1 conc zero-fill. extern Monitor* Cleanup_mon; // used for G1 conc cleanup. -extern Monitor* G1ConcRefine_mon; // used for G1 conc-refine - // coordination. - extern Mutex* SATB_Q_FL_lock; // Protects SATB Q // buffer free list. extern Monitor* SATB_Q_CBL_mon; // Protects SATB Q