提交 26afc937 编写于 作者: J johnc

7143511: G1: Another instance of high GC Worker Other time (50ms)

Summary: Tiered compilation has increased the number of nmethods in the code cache. This has, in turn, significantly increased the number of marked nmethods processed during the StrongRootsScope destructor. Create a specialized version of CodeBlobToOopClosure for G1 which places only those nmethods that contain pointers into the collection set on to the marked nmethods list.
Reviewed-by: iveresov, tonyp
上级 b683d293
......@@ -553,7 +553,7 @@ public:
static void oops_do_marking_prologue();
static void oops_do_marking_epilogue();
static bool oops_do_marking_is_active() { return _oops_do_mark_nmethods != NULL; }
DEBUG_ONLY(bool test_oops_do_mark() { return _oops_do_mark_link != NULL; })
bool test_oops_do_mark() { return _oops_do_mark_link != NULL; }
// ScopeDesc for an instruction
ScopeDesc* scope_desc_at(address pc);
......
......@@ -4675,67 +4675,74 @@ public:
double start_time_ms = os::elapsedTime() * 1000.0;
_g1h->g1_policy()->record_gc_worker_start_time(worker_id, start_time_ms);
ResourceMark rm;
HandleMark hm;
{
ResourceMark rm;
HandleMark hm;
ReferenceProcessor* rp = _g1h->ref_processor_stw();
ReferenceProcessor* rp = _g1h->ref_processor_stw();
G1ParScanThreadState pss(_g1h, worker_id);
G1ParScanHeapEvacClosure scan_evac_cl(_g1h, &pss, rp);
G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp);
G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss, rp);
G1ParScanThreadState pss(_g1h, worker_id);
G1ParScanHeapEvacClosure scan_evac_cl(_g1h, &pss, rp);
G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp);
G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss, rp);
pss.set_evac_closure(&scan_evac_cl);
pss.set_evac_failure_closure(&evac_failure_cl);
pss.set_partial_scan_closure(&partial_scan_cl);
pss.set_evac_closure(&scan_evac_cl);
pss.set_evac_failure_closure(&evac_failure_cl);
pss.set_partial_scan_closure(&partial_scan_cl);
G1ParScanExtRootClosure only_scan_root_cl(_g1h, &pss, rp);
G1ParScanPermClosure only_scan_perm_cl(_g1h, &pss, rp);
G1ParScanExtRootClosure only_scan_root_cl(_g1h, &pss, rp);
G1ParScanPermClosure only_scan_perm_cl(_g1h, &pss, rp);
G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss, rp);
G1ParScanAndMarkPermClosure scan_mark_perm_cl(_g1h, &pss, rp);
G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss, rp);
G1ParScanAndMarkPermClosure scan_mark_perm_cl(_g1h, &pss, rp);
OopClosure* scan_root_cl = &only_scan_root_cl;
OopsInHeapRegionClosure* scan_perm_cl = &only_scan_perm_cl;
OopClosure* scan_root_cl = &only_scan_root_cl;
OopsInHeapRegionClosure* scan_perm_cl = &only_scan_perm_cl;
if (_g1h->g1_policy()->during_initial_mark_pause()) {
// We also need to mark copied objects.
scan_root_cl = &scan_mark_root_cl;
scan_perm_cl = &scan_mark_perm_cl;
}
if (_g1h->g1_policy()->during_initial_mark_pause()) {
// We also need to mark copied objects.
scan_root_cl = &scan_mark_root_cl;
scan_perm_cl = &scan_mark_perm_cl;
}
G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss);
G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss);
pss.start_strong_roots();
_g1h->g1_process_strong_roots(/* not collecting perm */ false,
SharedHeap::SO_AllClasses,
scan_root_cl,
&push_heap_rs_cl,
scan_perm_cl,
worker_id);
pss.end_strong_roots();
pss.start_strong_roots();
_g1h->g1_process_strong_roots(/* not collecting perm */ false,
SharedHeap::SO_AllClasses,
scan_root_cl,
&push_heap_rs_cl,
scan_perm_cl,
worker_id);
pss.end_strong_roots();
{
double start = os::elapsedTime();
G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
evac.do_void();
double elapsed_ms = (os::elapsedTime()-start)*1000.0;
double term_ms = pss.term_time()*1000.0;
_g1h->g1_policy()->record_obj_copy_time(worker_id, elapsed_ms-term_ms);
_g1h->g1_policy()->record_termination(worker_id, term_ms, pss.term_attempts());
}
_g1h->g1_policy()->record_thread_age_table(pss.age_table());
_g1h->update_surviving_young_words(pss.surviving_young_words()+1);
{
double start = os::elapsedTime();
G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
evac.do_void();
double elapsed_ms = (os::elapsedTime()-start)*1000.0;
double term_ms = pss.term_time()*1000.0;
_g1h->g1_policy()->record_obj_copy_time(worker_id, elapsed_ms-term_ms);
_g1h->g1_policy()->record_termination(worker_id, term_ms, pss.term_attempts());
}
_g1h->g1_policy()->record_thread_age_table(pss.age_table());
_g1h->update_surviving_young_words(pss.surviving_young_words()+1);
// Clean up any par-expanded rem sets.
HeapRegionRemSet::par_cleanup();
if (ParallelGCVerbose) {
MutexLocker x(stats_lock());
pss.print_termination_stats(worker_id);
}
// Clean up any par-expanded rem sets.
HeapRegionRemSet::par_cleanup();
assert(pss.refs()->is_empty(), "should be empty");
if (ParallelGCVerbose) {
MutexLocker x(stats_lock());
pss.print_termination_stats(worker_id);
// Close the inner scope so that the ResourceMark and HandleMark
// destructors are executed here and are included as part of the
// "GC Worker Time".
}
assert(pss.refs()->is_empty(), "should be empty");
double end_time_ms = os::elapsedTime() * 1000.0;
_g1h->g1_policy()->record_gc_worker_end_time(worker_id, end_time_ms);
}
......@@ -4743,6 +4750,67 @@ public:
// *** Common G1 Evacuation Stuff
// Closures that support the filtering of CodeBlobs scanned during
// external root scanning.
// Closure applied to reference fields in code blobs (specifically nmethods)
// to determine whether an nmethod contains references that point into
// the collection set. Used as a predicate when walking code roots so
// that only nmethods that point into the collection set are added to the
// 'marked' list.
class G1FilteredCodeBlobToOopClosure : public CodeBlobToOopClosure {
class G1PointsIntoCSOopClosure : public OopClosure {
G1CollectedHeap* _g1;
bool _points_into_cs;
public:
G1PointsIntoCSOopClosure(G1CollectedHeap* g1) :
_g1(g1), _points_into_cs(false) { }
bool points_into_cs() const { return _points_into_cs; }
template <class T>
void do_oop_nv(T* p) {
if (!_points_into_cs) {
T heap_oop = oopDesc::load_heap_oop(p);
if (!oopDesc::is_null(heap_oop) &&
_g1->in_cset_fast_test(oopDesc::decode_heap_oop_not_null(heap_oop))) {
_points_into_cs = true;
}
}
}
virtual void do_oop(oop* p) { do_oop_nv(p); }
virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
};
G1CollectedHeap* _g1;
public:
G1FilteredCodeBlobToOopClosure(G1CollectedHeap* g1, OopClosure* cl) :
CodeBlobToOopClosure(cl, true), _g1(g1) { }
virtual void do_code_blob(CodeBlob* cb) {
nmethod* nm = cb->as_nmethod_or_null();
if (nm != NULL && !(nm->test_oops_do_mark())) {
G1PointsIntoCSOopClosure predicate_cl(_g1);
nm->oops_do(&predicate_cl);
if (predicate_cl.points_into_cs()) {
// At least one of the reference fields or the oop relocations
// in the nmethod points into the collection set. We have to
// 'mark' this nmethod.
// Note: Revisit the following if CodeBlobToOopClosure::do_code_blob()
// or MarkingCodeBlobClosure::do_code_blob() change.
if (!nm->test_set_oops_do_mark()) {
do_newly_marked_nmethod(nm);
}
}
}
}
};
// This method is run in a GC worker.
void
......@@ -4764,7 +4832,7 @@ g1_process_strong_roots(bool collecting_perm_gen,
// Walk the code cache w/o buffering, because StarTask cannot handle
// unaligned oop locations.
CodeBlobToOopClosure eager_scan_code_roots(scan_non_heap_roots, /*do_marking=*/ true);
G1FilteredCodeBlobToOopClosure eager_scan_code_roots(this, scan_non_heap_roots);
process_strong_roots(false, // no scoping; this is parallel code
collecting_perm_gen, so,
......@@ -5378,25 +5446,39 @@ void G1CollectedHeap::evacuate_collection_set() {
rem_set()->prepare_for_younger_refs_iterate(true);
assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty");
double start_par = os::elapsedTime();
double start_par_time_sec = os::elapsedTime();
double end_par_time_sec;
if (G1CollectedHeap::use_parallel_gc_threads()) {
// The individual threads will set their evac-failure closures.
StrongRootsScope srs(this);
if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
// These tasks use ShareHeap::_process_strong_tasks
assert(UseDynamicNumberOfGCThreads ||
workers()->active_workers() == workers()->total_workers(),
"If not dynamic should be using all the workers");
workers()->run_task(&g1_par_task);
} else {
{
StrongRootsScope srs(this);
g1_par_task.set_for_termination(n_workers);
g1_par_task.work(0);
if (G1CollectedHeap::use_parallel_gc_threads()) {
// The individual threads will set their evac-failure closures.
if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
// These tasks use ShareHeap::_process_strong_tasks
assert(UseDynamicNumberOfGCThreads ||
workers()->active_workers() == workers()->total_workers(),
"If not dynamic should be using all the workers");
workers()->run_task(&g1_par_task);
} else {
g1_par_task.set_for_termination(n_workers);
g1_par_task.work(0);
}
end_par_time_sec = os::elapsedTime();
// Closing the inner scope will execute the destructor
// for the StrongRootsScope object. We record the current
// elapsed time before closing the scope so that time
// taken for the SRS destructor is NOT included in the
// reported parallel time.
}
double par_time = (os::elapsedTime() - start_par) * 1000.0;
g1_policy()->record_par_time(par_time);
double par_time_ms = (end_par_time_sec - start_par_time_sec) * 1000.0;
g1_policy()->record_par_time(par_time_ms);
double code_root_fixup_time_ms =
(os::elapsedTime() - end_par_time_sec) * 1000.0;
g1_policy()->record_code_root_fixup_time(code_root_fixup_time_ms);
set_par_threads(0);
......
......@@ -1261,6 +1261,9 @@ void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
other_time_ms -= known_time;
}
// Now subtract the time taken to fix up roots in generated code
other_time_ms -= _cur_collection_code_root_fixup_time_ms;
// Subtract the time taken to clean the card table from the
// current value of "other time"
other_time_ms -= _cur_clear_ct_time_ms;
......@@ -1401,10 +1404,10 @@ void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
print_par_stats(2, "Termination", _par_last_termination_times_ms);
print_par_sizes(3, "Termination Attempts", _par_last_termination_attempts);
print_par_stats(2, "GC Worker End", _par_last_gc_worker_end_times_ms);
for (int i = 0; i < _parallel_gc_threads; i++) {
_par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] - _par_last_gc_worker_start_times_ms[i];
_par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] -
_par_last_gc_worker_start_times_ms[i];
double worker_known_time = _par_last_ext_root_scan_times_ms[i] +
_par_last_satb_filtering_times_ms[i] +
......@@ -1413,10 +1416,13 @@ void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
_par_last_obj_copy_times_ms[i] +
_par_last_termination_times_ms[i];
_par_last_gc_worker_other_times_ms[i] = _cur_collection_par_time_ms - worker_known_time;
_par_last_gc_worker_other_times_ms[i] = _par_last_gc_worker_times_ms[i] -
worker_known_time;
}
print_par_stats(2, "GC Worker", _par_last_gc_worker_times_ms);
print_par_stats(2, "GC Worker Other", _par_last_gc_worker_other_times_ms);
print_par_stats(2, "GC Worker Total", _par_last_gc_worker_times_ms);
print_par_stats(2, "GC Worker End", _par_last_gc_worker_end_times_ms);
} else {
print_stats(1, "Ext Root Scanning", ext_root_scan_time);
if (print_marking_info) {
......@@ -1427,6 +1433,7 @@ void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
print_stats(1, "Scan RS", scan_rs_time);
print_stats(1, "Object Copying", obj_copy_time);
}
print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
if (print_marking_info) {
print_stats(1, "Complete CSet Marking", _mark_closure_time_ms);
}
......
......@@ -179,6 +179,9 @@ private:
size_t _cur_collection_pause_used_at_start_bytes;
size_t _cur_collection_pause_used_regions_at_start;
double _cur_collection_par_time_ms;
double _cur_collection_code_root_fixup_time_ms;
double _cur_satb_drain_time_ms;
double _cur_clear_ct_time_ms;
double _cur_ref_proc_time_ms;
......@@ -226,8 +229,8 @@ private:
double* _par_last_gc_worker_times_ms;
// Each workers 'other' time i.e. the elapsed time of the parallel
// phase of the pause minus the sum of the individual sub-phase
// times for a given worker thread.
// code executed by a worker minus the sum of the individual sub-phase
// times for that worker thread.
double* _par_last_gc_worker_other_times_ms;
// indicates whether we are in young or mixed GC mode
......@@ -897,6 +900,10 @@ public:
_cur_collection_par_time_ms = ms;
}
void record_code_root_fixup_time(double ms) {
_cur_collection_code_root_fixup_time_ms = ms;
}
void record_aux_start_time(int i) {
guarantee(i < _aux_num, "should be within range");
_cur_aux_start_times_ms[i] = os::elapsedTime() * 1000.0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册