From 1498a56a2aeb2f6b8398416c44d6636a1216f329 Mon Sep 17 00:00:00 2001 From: tonyp Date: Tue, 25 Jan 2011 17:58:19 -0500 Subject: [PATCH] 7014261: G1: RSet-related failures Summary: A race between the concurrent cleanup thread and the VM thread while it is processing the "expanded sparse table list" causes both threads to try to free the same sparse table entry and either causes one of the threads to fail or leaves the entry in an inconsistent state. The solution is purge all entries on the expanded list that correspond go regions that are being cleaned up. Reviewed-by: brutisso, johnc --- .../gc_implementation/g1/concurrentMark.cpp | 54 +++++++++++-------- .../gc_implementation/g1/g1CollectedHeap.cpp | 5 +- .../gc_implementation/g1/g1CollectedHeap.hpp | 12 +++-- .../gc_implementation/g1/heapRegionRemSet.cpp | 21 +++++++- .../gc_implementation/g1/heapRegionRemSet.hpp | 17 ++++-- .../vm/gc_implementation/g1/sparsePRT.cpp | 47 +++++++++++++++- .../vm/gc_implementation/g1/sparsePRT.hpp | 51 +++++++++++++++++- 7 files changed, 171 insertions(+), 36 deletions(-) diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/src/share/vm/gc_implementation/g1/concurrentMark.cpp index e082c1b21..7744f59f3 100644 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp @@ -1500,21 +1500,19 @@ class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { size_t _max_live_bytes; size_t _regions_claimed; size_t _freed_bytes; - FreeRegionList _local_cleanup_list; - HumongousRegionSet _humongous_proxy_set; + FreeRegionList* _local_cleanup_list; + HumongousRegionSet* _humongous_proxy_set; + HRRSCleanupTask* _hrrs_cleanup_task; double _claimed_region_time; double _max_region_time; public: G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, - int worker_num); + int worker_num, + FreeRegionList* local_cleanup_list, + HumongousRegionSet* humongous_proxy_set, + HRRSCleanupTask* hrrs_cleanup_task); size_t freed_bytes() { return _freed_bytes; } - FreeRegionList* local_cleanup_list() { - return &_local_cleanup_list; - } - HumongousRegionSet* humongous_proxy_set() { - return &_humongous_proxy_set; - } bool doHeapRegion(HeapRegion *r); @@ -1541,7 +1539,12 @@ public: void work(int i) { double start = os::elapsedTime(); - G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i); + FreeRegionList local_cleanup_list("Local Cleanup List"); + HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); + HRRSCleanupTask hrrs_cleanup_task; + G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list, + &humongous_proxy_set, + &hrrs_cleanup_task); if (G1CollectedHeap::use_parallel_gc_threads()) { _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, HeapRegion::NoteEndClaimValue); @@ -1553,15 +1556,17 @@ public: // Now update the lists _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), NULL /* free_list */, - g1_note_end.humongous_proxy_set(), + &humongous_proxy_set, true /* par */); { MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); _max_live_bytes += g1_note_end.max_live_bytes(); _freed_bytes += g1_note_end.freed_bytes(); - _cleanup_list->add_as_tail(g1_note_end.local_cleanup_list()); - assert(g1_note_end.local_cleanup_list()->is_empty(), "post-condition"); + _cleanup_list->add_as_tail(&local_cleanup_list); + assert(local_cleanup_list.is_empty(), "post-condition"); + + HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); } double end = os::elapsedTime(); if (G1PrintParCleanupStats) { @@ -1602,13 +1607,17 @@ public: G1NoteEndOfConcMarkClosure:: G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, - int worker_num) + int worker_num, + FreeRegionList* local_cleanup_list, + HumongousRegionSet* humongous_proxy_set, + HRRSCleanupTask* hrrs_cleanup_task) : _g1(g1), _worker_num(worker_num), _max_live_bytes(0), _regions_claimed(0), _freed_bytes(0), _claimed_region_time(0.0), _max_region_time(0.0), - _local_cleanup_list("Local Cleanup List"), - _humongous_proxy_set("Local Cleanup Humongous Proxy Set") { } + _local_cleanup_list(local_cleanup_list), + _humongous_proxy_set(humongous_proxy_set), + _hrrs_cleanup_task(hrrs_cleanup_task) { } bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) { // We use a claim value of zero here because all regions @@ -1619,11 +1628,12 @@ bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) { _regions_claimed++; hr->note_end_of_marking(); _max_live_bytes += hr->max_live_bytes(); - _g1->free_region_if_totally_empty(hr, - &_freed_bytes, - &_local_cleanup_list, - &_humongous_proxy_set, - true /* par */); + _g1->free_region_if_empty(hr, + &_freed_bytes, + _local_cleanup_list, + _humongous_proxy_set, + _hrrs_cleanup_task, + true /* par */); double region_time = (os::elapsedTime() - start); _claimed_region_time += region_time; if (region_time > _max_region_time) _max_region_time = region_time; @@ -1659,6 +1669,8 @@ void ConcurrentMark::cleanup() { double start = os::elapsedTime(); + HeapRegionRemSet::reset_for_cleanup_tasks(); + // Do counting once more with the world stopped for good measure. G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), &_region_bm, &_card_bm); diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp index db8613553..3d3522461 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -4925,10 +4925,11 @@ void G1CollectedHeap::evacuate_collection_set() { COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); } -void G1CollectedHeap::free_region_if_totally_empty(HeapRegion* hr, +void G1CollectedHeap::free_region_if_empty(HeapRegion* hr, size_t* pre_used, FreeRegionList* free_list, HumongousRegionSet* humongous_proxy_set, + HRRSCleanupTask* hrrs_cleanup_task, bool par) { if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { if (hr->isHumongous()) { @@ -4937,6 +4938,8 @@ void G1CollectedHeap::free_region_if_totally_empty(HeapRegion* hr, } else { free_region(hr, pre_used, free_list, par); } + } else { + hr->rem_set()->do_cleanup_work(hrrs_cleanup_task); } } diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp index 709dbd628..4bad5fd43 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp @@ -40,6 +40,7 @@ class HeapRegion; class HeapRegionSeq; +class HRRSCleanupTask; class PermanentGenerationSpec; class GenerationSpec; class OopsInHeapRegionClosure; @@ -1099,11 +1100,12 @@ public: // all dead. It calls either free_region() or // free_humongous_region() depending on the type of the region that // is passed to it. - void free_region_if_totally_empty(HeapRegion* hr, - size_t* pre_used, - FreeRegionList* free_list, - HumongousRegionSet* humongous_proxy_set, - bool par); + void free_region_if_empty(HeapRegion* hr, + size_t* pre_used, + FreeRegionList* free_list, + HumongousRegionSet* humongous_proxy_set, + HRRSCleanupTask* hrrs_cleanup_task, + bool par); // It appends the free list to the master free list and updates the // master humongous list according to the contents of the proxy diff --git a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp index dde8255cd..8e498ea4a 100644 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -463,7 +463,6 @@ public: } static void par_contract_all(); - }; void PosParPRT::par_contract_all() { @@ -1070,6 +1069,11 @@ bool OtherRegionsTable::contains_reference_locked(OopOrNarrowOopStar from) const } +void +OtherRegionsTable::do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task) { + _sparse_table.do_cleanup_work(hrrs_cleanup_task); +} + // Determines how many threads can add records to an rset in parallel. // This can be done by either mutator threads together with the // concurrent refinement threads or GC threads. @@ -1384,6 +1388,19 @@ void HeapRegionRemSet::print_recorded() { } } +void HeapRegionRemSet::reset_for_cleanup_tasks() { + SparsePRT::reset_for_cleanup_tasks(); +} + +void HeapRegionRemSet::do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task) { + _other_regions.do_cleanup_work(hrrs_cleanup_task); +} + +void +HeapRegionRemSet::finish_cleanup_task(HRRSCleanupTask* hrrs_cleanup_task) { + SparsePRT::finish_cleanup_task(hrrs_cleanup_task); +} + #ifndef PRODUCT void HeapRegionRemSet::test() { os::sleep(Thread::current(), (jlong)5000, false); diff --git a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp index 62378db57..3f0169bb3 100644 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,6 +38,10 @@ class HeapRegionRemSetIterator; class PosParPRT; class SparsePRT; +// Essentially a wrapper around SparsePRTCleanupTask. See +// sparsePRT.hpp for more details. +class HRRSCleanupTask : public SparsePRTCleanupTask { +}; // The "_coarse_map" is a bitmap with one bit for each region, where set // bits indicate that the corresponding region may contain some pointer @@ -156,6 +160,8 @@ public: // "from_hr" is being cleared; remove any entries from it. void clear_incoming_entry(HeapRegion* from_hr); + void do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task); + // Declare the heap size (in # of regions) to the OtherRegionsTable. // (Uses it to initialize from_card_cache). static void init_from_card_cache(size_t max_regions); @@ -165,10 +171,8 @@ public: static void shrink_from_card_cache(size_t new_n_regs); static void print_from_card_cache(); - }; - class HeapRegionRemSet : public CHeapObj { friend class VMStructs; friend class HeapRegionRemSetIterator; @@ -342,11 +346,16 @@ public: static void print_recorded(); static void record_event(Event evnt); + // These are wrappers for the similarly-named methods on + // SparsePRT. Look at sparsePRT.hpp for more details. + static void reset_for_cleanup_tasks(); + void do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task); + static void finish_cleanup_task(HRRSCleanupTask* hrrs_cleanup_task); + // Run unit tests. #ifndef PRODUCT static void test(); #endif - }; class HeapRegionRemSetIterator : public CHeapObj { diff --git a/src/share/vm/gc_implementation/g1/sparsePRT.cpp b/src/share/vm/gc_implementation/g1/sparsePRT.cpp index 042ca86c6..156bc6981 100644 --- a/src/share/vm/gc_implementation/g1/sparsePRT.cpp +++ b/src/share/vm/gc_implementation/g1/sparsePRT.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -415,6 +415,38 @@ SparsePRT* SparsePRT::get_from_expanded_list() { return NULL; } +void SparsePRT::reset_for_cleanup_tasks() { + _head_expanded_list = NULL; +} + +void SparsePRT::do_cleanup_work(SparsePRTCleanupTask* sprt_cleanup_task) { + if (should_be_on_expanded_list()) { + sprt_cleanup_task->add(this); + } +} + +void SparsePRT::finish_cleanup_task(SparsePRTCleanupTask* sprt_cleanup_task) { + assert(ParGCRareEvent_lock->owned_by_self(), "pre-condition"); + SparsePRT* head = sprt_cleanup_task->head(); + SparsePRT* tail = sprt_cleanup_task->tail(); + if (head != NULL) { + assert(tail != NULL, "if head is not NULL, so should tail"); + + tail->set_next_expanded(_head_expanded_list); + _head_expanded_list = head; + } else { + assert(tail == NULL, "if head is NULL, so should tail"); + } +} + +bool SparsePRT::should_be_on_expanded_list() { + if (_expanded) { + assert(_cur != _next, "if _expanded is true, cur should be != _next"); + } else { + assert(_cur == _next, "if _expanded is false, cur should be == _next"); + } + return expanded(); +} void SparsePRT::cleanup_all() { // First clean up all expanded tables so they agree on next and cur. @@ -484,6 +516,7 @@ void SparsePRT::clear() { _cur->clear(); } _next = _cur; + _expanded = false; } void SparsePRT::cleanup() { @@ -518,3 +551,15 @@ void SparsePRT::expand() { } add_to_expanded_list(this); } + +void SparsePRTCleanupTask::add(SparsePRT* sprt) { + assert(sprt->should_be_on_expanded_list(), "pre-condition"); + + sprt->set_next_expanded(NULL); + if (_tail != NULL) { + _tail->set_next_expanded(sprt); + } else { + _head = sprt; + } + _tail = sprt; +} diff --git a/src/share/vm/gc_implementation/g1/sparsePRT.hpp b/src/share/vm/gc_implementation/g1/sparsePRT.hpp index 86e1789db..678008679 100644 --- a/src/share/vm/gc_implementation/g1/sparsePRT.hpp +++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -212,8 +212,11 @@ public: // mutex. class SparsePRTIter; +class SparsePRTCleanupTask; class SparsePRT VALUE_OBJ_CLASS_SPEC { + friend class SparsePRTCleanupTask; + // Iterations are done on the _cur hash table, since they only need to // see entries visible at the start of a collection pause. // All other operations are done using the _next hash table. @@ -238,6 +241,8 @@ class SparsePRT VALUE_OBJ_CLASS_SPEC { SparsePRT* next_expanded() { return _next_expanded; } void set_next_expanded(SparsePRT* nxt) { _next_expanded = nxt; } + bool should_be_on_expanded_list(); + static SparsePRT* _head_expanded_list; public: @@ -284,12 +289,36 @@ public: static void add_to_expanded_list(SparsePRT* sprt); static SparsePRT* get_from_expanded_list(); + // The purpose of these three methods is to help the GC workers + // during the cleanup pause to recreate the expanded list, purging + // any tables from it that belong to regions that are freed during + // cleanup (if we don't purge those tables, there is a race that + // causes various crashes; see CR 7014261). + // + // We chose to recreate the expanded list, instead of purging + // entries from it by iterating over it, to avoid this serial phase + // at the end of the cleanup pause. + // + // The three methods below work as follows: + // * reset_for_cleanup_tasks() : Nulls the expanded list head at the + // start of the cleanup pause. + // * do_cleanup_work() : Called by the cleanup workers for every + // region that is not free / is being freed by the cleanup + // pause. It creates a list of expanded tables whose head / tail + // are on the thread-local SparsePRTCleanupTask object. + // * finish_cleanup_task() : Called by the cleanup workers after + // they complete their cleanup task. It adds the local list into + // the global expanded list. It assumes that the + // ParGCRareEvent_lock is being held to ensure MT-safety. + static void reset_for_cleanup_tasks(); + void do_cleanup_work(SparsePRTCleanupTask* sprt_cleanup_task); + static void finish_cleanup_task(SparsePRTCleanupTask* sprt_cleanup_task); + bool contains_card(RegionIdx_t region_id, CardIdx_t card_index) const { return _next->contains_card(region_id, card_index); } }; - class SparsePRTIter: public RSHashTableIter { public: void init(const SparsePRT* sprt) { @@ -300,4 +329,22 @@ public: } }; +// This allows each worker during a cleanup pause to create a +// thread-local list of sparse tables that have been expanded and need +// to be processed at the beginning of the next GC pause. This lists +// are concatenated into the single expanded list at the end of the +// cleanup pause. +class SparsePRTCleanupTask VALUE_OBJ_CLASS_SPEC { +private: + SparsePRT* _head; + SparsePRT* _tail; + +public: + SparsePRTCleanupTask() : _head(NULL), _tail(NULL) { } + + void add(SparsePRT* sprt); + SparsePRT* head() { return _head; } + SparsePRT* tail() { return _tail; } +}; + #endif // SHARE_VM_GC_IMPLEMENTATION_G1_SPARSEPRT_HPP -- GitLab