From 651edb8d6760ae4d3ad22307c1719ce9cba861f8 Mon Sep 17 00:00:00 2001 From: ysr Date: Wed, 13 Jan 2010 15:26:39 -0800 Subject: [PATCH] 6896647: card marks can be deferred too long Summary: Deferred card marks are now flushed during the gc prologue. Parallel[Scavege,OldGC] and SerialGC no longer defer card marks generated by COMPILER2 as a result of ReduceInitialCardMarks. For these cases, introduced a diagnostic option to defer the card marks, only for the purposes of testing and diagnostics. CMS and G1 continue to defer card marks. Potential performance concern related to single-threaded flushing of deferred card marks in the gc prologue will be addressed in the future. Reviewed-by: never, johnc --- .../gc_implementation/g1/g1CollectedHeap.cpp | 1 + .../gc_implementation/g1/g1CollectedHeap.hpp | 4 + .../parallelScavenge/parallelScavengeHeap.cpp | 6 +- .../parallelScavenge/parallelScavengeHeap.hpp | 5 +- src/share/vm/gc_interface/collectedHeap.cpp | 79 +++++++++++-------- src/share/vm/gc_interface/collectedHeap.hpp | 29 +++++-- src/share/vm/memory/genCollectedHeap.cpp | 3 + src/share/vm/memory/genCollectedHeap.hpp | 4 + src/share/vm/opto/graphKit.cpp | 7 +- src/share/vm/opto/runtime.cpp | 8 +- src/share/vm/opto/runtime.hpp | 5 +- src/share/vm/runtime/globals.hpp | 4 + src/share/vm/runtime/thread.cpp | 5 +- src/share/vm/runtime/vmStructs.cpp | 1 + 14 files changed, 107 insertions(+), 54 deletions(-) diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp index c60f17680..c3319d13e 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -1441,6 +1441,7 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : } jint G1CollectedHeap::initialize() { + CollectedHeap::pre_initialize(); os::enable_vtime(); // Necessary to satisfy locking discipline assertions. diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp index 3c071beed..bb73eb288 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp @@ -1007,6 +1007,10 @@ public: return true; } + virtual bool card_mark_must_follow_store() const { + return true; + } + bool is_in_young(oop obj) { HeapRegion* hr = heap_region_containing(obj); return hr != NULL && hr->is_young(); diff --git a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp index 8396e7960..9fe57121f 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp @@ -51,6 +51,8 @@ static void trace_gen_sizes(const char* const str, } jint ParallelScavengeHeap::initialize() { + CollectedHeap::pre_initialize(); + // Cannot be initialized until after the flags are parsed GenerationSizer flag_parser; @@ -717,10 +719,6 @@ HeapWord* ParallelScavengeHeap::allocate_new_tlab(size_t size) { return young_gen()->allocate(size, true); } -void ParallelScavengeHeap::fill_all_tlabs(bool retire) { - CollectedHeap::fill_all_tlabs(retire); -} - void ParallelScavengeHeap::accumulate_statistics_all_tlabs() { CollectedHeap::accumulate_statistics_all_tlabs(); } diff --git a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp index 3bf7671b2..46fdcc533 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp @@ -54,7 +54,6 @@ class ParallelScavengeHeap : public CollectedHeap { protected: static inline size_t total_invocations(); HeapWord* allocate_new_tlab(size_t size); - void fill_all_tlabs(bool retire); public: ParallelScavengeHeap() : CollectedHeap() { @@ -191,6 +190,10 @@ class ParallelScavengeHeap : public CollectedHeap { return true; } + virtual bool card_mark_must_follow_store() const { + return false; + } + // Return true if we don't we need a store barrier for // initializing stores to an object at this address. virtual bool can_elide_initializing_store_barrier(oop new_obj); diff --git a/src/share/vm/gc_interface/collectedHeap.cpp b/src/share/vm/gc_interface/collectedHeap.cpp index fb81f9efd..031afd575 100644 --- a/src/share/vm/gc_interface/collectedHeap.cpp +++ b/src/share/vm/gc_interface/collectedHeap.cpp @@ -59,8 +59,18 @@ CollectedHeap::CollectedHeap() PerfDataManager::create_string_variable(SUN_GC, "lastCause", 80, GCCause::to_string(_gc_lastcause), CHECK); } + _defer_initial_card_mark = false; // strengthened by subclass in pre_initialize() below. } +void CollectedHeap::pre_initialize() { + // Used for ReduceInitialCardMarks (when COMPILER2 is used); + // otherwise remains unused. +#ifdef COMPLER2 + _defer_initial_card_mark = ReduceInitialCardMarks && (DeferInitialCardMark || card_mark_must_follow_store()); +#else + assert(_defer_initial_card_mark == false, "Who would set it?"); +#endif +} #ifndef PRODUCT void CollectedHeap::check_for_bad_heap_word_value(HeapWord* addr, size_t size) { @@ -140,12 +150,13 @@ HeapWord* CollectedHeap::allocate_from_tlab_slow(Thread* thread, size_t size) { void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) { MemRegion deferred = thread->deferred_card_mark(); if (!deferred.is_empty()) { + assert(_defer_initial_card_mark, "Otherwise should be empty"); { // Verify that the storage points to a parsable object in heap DEBUG_ONLY(oop old_obj = oop(deferred.start());) assert(is_in(old_obj), "Not in allocated heap"); assert(!can_elide_initializing_store_barrier(old_obj), - "Else should have been filtered in defer_store_barrier()"); + "Else should have been filtered in new_store_pre_barrier()"); assert(!is_in_permanent(old_obj), "Sanity: not expected"); assert(old_obj->is_oop(true), "Not an oop"); assert(old_obj->is_parsable(), "Will not be concurrently parsable"); @@ -174,9 +185,7 @@ void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) { // so long as the card-mark is completed before the next // scavenge. For all these cases, we can do a card mark // at the point at which we do a slow path allocation -// in the old gen. For uniformity, however, we end -// up using the same scheme (see below) for all three -// cases (deferring the card-mark appropriately). +// in the old gen, i.e. in this call. // (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires // in addition that the card-mark for an old gen allocated // object strictly follow any associated initializing stores. @@ -199,12 +208,13 @@ void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) { // but, like in CMS, because of the presence of concurrent refinement // (much like CMS' precleaning), must strictly follow the oop-store. // Thus, using the same protocol for maintaining the intended -// invariants turns out, serendepitously, to be the same for all -// three collectors/heap types above. +// invariants turns out, serendepitously, to be the same for both +// G1 and CMS. // -// For each future collector, this should be reexamined with -// that specific collector in mind. -oop CollectedHeap::defer_store_barrier(JavaThread* thread, oop new_obj) { +// For any future collector, this code should be reexamined with +// that specific collector in mind, and the documentation above suitably +// extended and updated. +oop CollectedHeap::new_store_pre_barrier(JavaThread* thread, oop new_obj) { // If a previous card-mark was deferred, flush it now. flush_deferred_store_barrier(thread); if (can_elide_initializing_store_barrier(new_obj)) { @@ -212,10 +222,17 @@ oop CollectedHeap::defer_store_barrier(JavaThread* thread, oop new_obj) { // following the flush above. assert(thread->deferred_card_mark().is_empty(), "Error"); } else { - // Remember info for the newly deferred store barrier - MemRegion deferred = MemRegion((HeapWord*)new_obj, new_obj->size()); - assert(!deferred.is_empty(), "Error"); - thread->set_deferred_card_mark(deferred); + MemRegion mr((HeapWord*)new_obj, new_obj->size()); + assert(!mr.is_empty(), "Error"); + if (_defer_initial_card_mark) { + // Defer the card mark + thread->set_deferred_card_mark(mr); + } else { + // Do the card mark + BarrierSet* bs = barrier_set(); + assert(bs->has_write_region_opt(), "No write_region() on BarrierSet"); + bs->write_region(mr); + } } return new_obj; } @@ -313,22 +330,6 @@ HeapWord* CollectedHeap::allocate_new_tlab(size_t size) { return NULL; } -void CollectedHeap::fill_all_tlabs(bool retire) { - assert(UseTLAB, "should not reach here"); - // See note in ensure_parsability() below. - assert(SafepointSynchronize::is_at_safepoint() || - !is_init_completed(), - "should only fill tlabs at safepoint"); - // The main thread starts allocating via a TLAB even before it - // has added itself to the threads list at vm boot-up. - assert(Threads::first() != NULL, - "Attempt to fill tlabs before main thread has been added" - " to threads list is doomed to failure!"); - for(JavaThread *thread = Threads::first(); thread; thread = thread->next()) { - thread->tlab().make_parsable(retire); - } -} - void CollectedHeap::ensure_parsability(bool retire_tlabs) { // The second disjunct in the assertion below makes a concession // for the start-up verification done while the VM is being @@ -343,8 +344,24 @@ void CollectedHeap::ensure_parsability(bool retire_tlabs) { "Should only be called at a safepoint or at start-up" " otherwise concurrent mutator activity may make heap " " unparsable again"); - if (UseTLAB) { - fill_all_tlabs(retire_tlabs); + const bool use_tlab = UseTLAB; + const bool deferred = _defer_initial_card_mark; + // The main thread starts allocating via a TLAB even before it + // has added itself to the threads list at vm boot-up. + assert(!use_tlab || Threads::first() != NULL, + "Attempt to fill tlabs before main thread has been added" + " to threads list is doomed to failure!"); + for (JavaThread *thread = Threads::first(); thread; thread = thread->next()) { + if (use_tlab) thread->tlab().make_parsable(retire_tlabs); +#ifdef COMPILER2 + // The deferred store barriers must all have been flushed to the + // card-table (or other remembered set structure) before GC starts + // processing the card-table (or other remembered set). + if (deferred) flush_deferred_store_barrier(thread); +#else + assert(!deferred, "Should be false"); + assert(thread->deferred_card_mark().is_empty(), "Should be empty"); +#endif } } diff --git a/src/share/vm/gc_interface/collectedHeap.hpp b/src/share/vm/gc_interface/collectedHeap.hpp index 19c67bbcd..2bc210a47 100644 --- a/src/share/vm/gc_interface/collectedHeap.hpp +++ b/src/share/vm/gc_interface/collectedHeap.hpp @@ -51,6 +51,9 @@ class CollectedHeap : public CHeapObj { // Used for filler objects (static, but initialized in ctor). static size_t _filler_array_max_size; + // Used in support of ReduceInitialCardMarks; only consulted if COMPILER2 is being used + bool _defer_initial_card_mark; + protected: MemRegion _reserved; BarrierSet* _barrier_set; @@ -70,13 +73,16 @@ class CollectedHeap : public CHeapObj { // Constructor CollectedHeap(); + // Do common initializations that must follow instance construction, + // for example, those needing virtual calls. + // This code could perhaps be moved into initialize() but would + // be slightly more awkward because we want the latter to be a + // pure virtual. + void pre_initialize(); + // Create a new tlab virtual HeapWord* allocate_new_tlab(size_t size); - // Fix up tlabs to make the heap well-formed again, - // optionally retiring the tlabs. - virtual void fill_all_tlabs(bool retire); - // Accumulate statistics on all tlabs. virtual void accumulate_statistics_all_tlabs(); @@ -431,14 +437,25 @@ class CollectedHeap : public CHeapObj { // promises to call this function on such a slow-path-allocated // object before performing initializations that have elided // store barriers. Returns new_obj, or maybe a safer copy thereof. - virtual oop defer_store_barrier(JavaThread* thread, oop new_obj); + virtual oop new_store_pre_barrier(JavaThread* thread, oop new_obj); // Answers whether an initializing store to a new object currently - // allocated at the given address doesn't need a (deferred) store + // allocated at the given address doesn't need a store // barrier. Returns "true" if it doesn't need an initializing // store barrier; answers "false" if it does. virtual bool can_elide_initializing_store_barrier(oop new_obj) = 0; + // If a compiler is eliding store barriers for TLAB-allocated objects, + // we will be informed of a slow-path allocation by a call + // to new_store_pre_barrier() above. Such a call precedes the + // initialization of the object itself, and no post-store-barriers will + // be issued. Some heap types require that the barrier strictly follows + // the initializing stores. (This is currently implemented by deferring the + // barrier until the next slow-path allocation or gc-related safepoint.) + // This interface answers whether a particular heap type needs the card + // mark to be thus strictly sequenced after the stores. + virtual bool card_mark_must_follow_store() const = 0; + // If the CollectedHeap was asked to defer a store barrier above, // this informs it to flush such a deferred store barrier to the // remembered set. diff --git a/src/share/vm/memory/genCollectedHeap.cpp b/src/share/vm/memory/genCollectedHeap.cpp index 5bff15970..91398d9e2 100644 --- a/src/share/vm/memory/genCollectedHeap.cpp +++ b/src/share/vm/memory/genCollectedHeap.cpp @@ -51,6 +51,8 @@ GenCollectedHeap::GenCollectedHeap(GenCollectorPolicy *policy) : } jint GenCollectedHeap::initialize() { + CollectedHeap::pre_initialize(); + int i; _n_gens = gen_policy()->number_of_generations(); @@ -129,6 +131,7 @@ jint GenCollectedHeap::initialize() { _rem_set = collector_policy()->create_rem_set(_reserved, n_covered_regions); set_barrier_set(rem_set()->bs()); + _gch = this; for (i = 0; i < _n_gens; i++) { diff --git a/src/share/vm/memory/genCollectedHeap.hpp b/src/share/vm/memory/genCollectedHeap.hpp index 9004e0d84..8295d078b 100644 --- a/src/share/vm/memory/genCollectedHeap.hpp +++ b/src/share/vm/memory/genCollectedHeap.hpp @@ -260,6 +260,10 @@ public: return true; } + virtual bool card_mark_must_follow_store() const { + return UseConcMarkSweepGC; + } + // We don't need barriers for stores to objects in the // young gen and, a fortiori, for initializing stores to // objects therein. This applies to {DefNew,ParNew}+{Tenured,CMS} diff --git a/src/share/vm/opto/graphKit.cpp b/src/share/vm/opto/graphKit.cpp index 4cc6128cd..57fea6480 100644 --- a/src/share/vm/opto/graphKit.cpp +++ b/src/share/vm/opto/graphKit.cpp @@ -3259,9 +3259,10 @@ void GraphKit::write_barrier_post(Node* oop_store, if (use_ReduceInitialCardMarks() && obj == just_allocated_object(control())) { // We can skip marks on a freshly-allocated object in Eden. - // Keep this code in sync with maybe_defer_card_mark() in runtime.cpp. - // That routine informs GC to take appropriate compensating steps - // so as to make this card-mark elision safe. + // Keep this code in sync with new_store_pre_barrier() in runtime.cpp. + // That routine informs GC to take appropriate compensating steps, + // upon a slow-path allocation, so as to make this card-mark + // elision safe. return; } diff --git a/src/share/vm/opto/runtime.cpp b/src/share/vm/opto/runtime.cpp index dc08d39e5..f0d0c2170 100644 --- a/src/share/vm/opto/runtime.cpp +++ b/src/share/vm/opto/runtime.cpp @@ -143,7 +143,7 @@ const char* OptoRuntime::stub_name(address entry) { // We failed the fast-path allocation. Now we need to do a scavenge or GC // and try allocation again. -void OptoRuntime::maybe_defer_card_mark(JavaThread* thread) { +void OptoRuntime::new_store_pre_barrier(JavaThread* thread) { // After any safepoint, just before going back to compiled code, // we inform the GC that we will be doing initializing writes to // this object in the future without emitting card-marks, so @@ -156,7 +156,7 @@ void OptoRuntime::maybe_defer_card_mark(JavaThread* thread) { assert(Universe::heap()->can_elide_tlab_store_barriers(), "compiler must check this first"); // GC may decide to give back a safer copy of new_obj. - new_obj = Universe::heap()->defer_store_barrier(thread, new_obj); + new_obj = Universe::heap()->new_store_pre_barrier(thread, new_obj); thread->set_vm_result(new_obj); } @@ -200,7 +200,7 @@ JRT_BLOCK_ENTRY(void, OptoRuntime::new_instance_C(klassOopDesc* klass, JavaThrea if (GraphKit::use_ReduceInitialCardMarks()) { // inform GC that we won't do card marks for initializing writes. - maybe_defer_card_mark(thread); + new_store_pre_barrier(thread); } JRT_END @@ -239,7 +239,7 @@ JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_C(klassOopDesc* array_type, int len if (GraphKit::use_ReduceInitialCardMarks()) { // inform GC that we won't do card marks for initializing writes. - maybe_defer_card_mark(thread); + new_store_pre_barrier(thread); } JRT_END diff --git a/src/share/vm/opto/runtime.hpp b/src/share/vm/opto/runtime.hpp index c3d8238ae..2c0c49880 100644 --- a/src/share/vm/opto/runtime.hpp +++ b/src/share/vm/opto/runtime.hpp @@ -133,8 +133,9 @@ class OptoRuntime : public AllStatic { // Allocate storage for a objArray or typeArray static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread); - // Post-slow-path-allocation step for implementing ReduceInitialCardMarks: - static void maybe_defer_card_mark(JavaThread* thread); + // Post-slow-path-allocation, pre-initializing-stores step for + // implementing ReduceInitialCardMarks + static void new_store_pre_barrier(JavaThread* thread); // Allocate storage for a multi-dimensional arrays // Note: needs to be fixed for arbitrary number of dimensions diff --git a/src/share/vm/runtime/globals.hpp b/src/share/vm/runtime/globals.hpp index 536429bdd..1da42283e 100644 --- a/src/share/vm/runtime/globals.hpp +++ b/src/share/vm/runtime/globals.hpp @@ -2015,6 +2015,10 @@ class CommandLineFlags { diagnostic(bool, GCParallelVerificationEnabled, true, \ "Enable parallel memory system verification") \ \ + diagnostic(bool, DeferInitialCardMark, false, \ + "When +ReduceInitialCardMarks, explicitly defer any that " \ + "may arise from new_pre_store_barrier") \ + \ diagnostic(bool, VerifyRememberedSets, false, \ "Verify GC remembered sets") \ \ diff --git a/src/share/vm/runtime/thread.cpp b/src/share/vm/runtime/thread.cpp index e347664fc..db90e0ef5 100644 --- a/src/share/vm/runtime/thread.cpp +++ b/src/share/vm/runtime/thread.cpp @@ -2357,9 +2357,8 @@ public: }; void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) { - // Flush deferred store-barriers, if any, associated with - // initializing stores done by this JavaThread in the current epoch. - Universe::heap()->flush_deferred_store_barrier(this); + // Verify that the deferred card marks have been flushed. + assert(deferred_card_mark().is_empty(), "Should be empty during GC"); // The ThreadProfiler oops_do is done from FlatProfiler::oops_do // since there may be more than one thread using each ThreadProfiler. diff --git a/src/share/vm/runtime/vmStructs.cpp b/src/share/vm/runtime/vmStructs.cpp index b355d87dc..41491bac2 100644 --- a/src/share/vm/runtime/vmStructs.cpp +++ b/src/share/vm/runtime/vmStructs.cpp @@ -309,6 +309,7 @@ static inline uint64_t cast_uint64_t(size_t x) nonstatic_field(CollectedHeap, _reserved, MemRegion) \ nonstatic_field(SharedHeap, _perm_gen, PermGen*) \ nonstatic_field(CollectedHeap, _barrier_set, BarrierSet*) \ + nonstatic_field(CollectedHeap, _defer_initial_card_mark, bool) \ nonstatic_field(CollectedHeap, _is_gc_active, bool) \ nonstatic_field(CompactibleSpace, _compaction_top, HeapWord*) \ nonstatic_field(CompactibleSpace, _first_dead, HeapWord*) \ -- GitLab