提交 349bfec0 编写于 作者: Y ysr

6888898: CMS: ReduceInitialCardMarks unsafe in the presence of cms precleaning

6889757: G1: enable card mark elision for initializing writes from compiled code (ReduceInitialCardMarks)
Summary: Defer the (compiler-elided) card-mark upon a slow-path allocation until after the store  and before the next subsequent safepoint; G1 now answers yes to can_elide_tlab_write_barriers().
Reviewed-by: jcoomes, kvn, never
上级 77eca09d
...@@ -992,11 +992,39 @@ public: ...@@ -992,11 +992,39 @@ public:
// Can a compiler initialize a new object without store barriers? // Can a compiler initialize a new object without store barriers?
// This permission only extends from the creation of a new object // This permission only extends from the creation of a new object
// via a TLAB up to the first subsequent safepoint. // via a TLAB up to the first subsequent safepoint. If such permission
// is granted for this heap type, the compiler promises to call
// defer_store_barrier() below on any slow path allocation of
// a new object for which such initializing store barriers will
// have been elided. G1, like CMS, allows this, but should be
// ready to provide a compensating write barrier as necessary
// if that storage came out of a non-young region. The efficiency
// of this implementation depends crucially on being able to
// answer very efficiently in constant time whether a piece of
// storage in the heap comes from a young region or not.
// See ReduceInitialCardMarks.
virtual bool can_elide_tlab_store_barriers() const { virtual bool can_elide_tlab_store_barriers() const {
// Since G1's TLAB's may, on occasion, come from non-young regions return true;
// as well. (Is there a flag controlling that? XXX) }
return false;
bool is_in_young(oop obj) {
HeapRegion* hr = heap_region_containing(obj);
return hr != NULL && hr->is_young();
}
// We don't need barriers for initializing stores to objects
// in the young gen: for the SATB pre-barrier, there is no
// pre-value that needs to be remembered; for the remembered-set
// update logging post-barrier, we don't maintain remembered set
// information for young gen objects. Note that non-generational
// G1 does not have any "young" objects, should not elide
// the rs logging barrier and so should always answer false below.
// However, non-generational G1 (-XX:-G1Gen) appears to have
// bit-rotted so was not tested below.
virtual bool can_elide_initializing_store_barrier(oop new_obj) {
assert(G1Gen || !is_in_young(new_obj),
"Non-generational G1 should never return true below");
return is_in_young(new_obj);
} }
// Can a compiler elide a store barrier when it writes // Can a compiler elide a store barrier when it writes
......
...@@ -314,41 +314,6 @@ bool ParallelScavengeHeap::is_in_reserved(const void* p) const { ...@@ -314,41 +314,6 @@ bool ParallelScavengeHeap::is_in_reserved(const void* p) const {
return false; return false;
} }
// Static method
bool ParallelScavengeHeap::is_in_young(oop* p) {
ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
assert(heap->kind() == CollectedHeap::ParallelScavengeHeap,
"Must be ParallelScavengeHeap");
PSYoungGen* young_gen = heap->young_gen();
if (young_gen->is_in_reserved(p)) {
return true;
}
return false;
}
// Static method
bool ParallelScavengeHeap::is_in_old_or_perm(oop* p) {
ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
assert(heap->kind() == CollectedHeap::ParallelScavengeHeap,
"Must be ParallelScavengeHeap");
PSOldGen* old_gen = heap->old_gen();
PSPermGen* perm_gen = heap->perm_gen();
if (old_gen->is_in_reserved(p)) {
return true;
}
if (perm_gen->is_in_reserved(p)) {
return true;
}
return false;
}
// There are two levels of allocation policy here. // There are two levels of allocation policy here.
// //
// When an allocation request fails, the requesting thread must invoke a VM // When an allocation request fails, the requesting thread must invoke a VM
...@@ -764,6 +729,13 @@ void ParallelScavengeHeap::resize_all_tlabs() { ...@@ -764,6 +729,13 @@ void ParallelScavengeHeap::resize_all_tlabs() {
CollectedHeap::resize_all_tlabs(); CollectedHeap::resize_all_tlabs();
} }
bool ParallelScavengeHeap::can_elide_initializing_store_barrier(oop new_obj) {
// We don't need barriers for stores to objects in the
// young gen and, a fortiori, for initializing stores to
// objects therein.
return is_in_young(new_obj);
}
// This method is used by System.gc() and JVMTI. // This method is used by System.gc() and JVMTI.
void ParallelScavengeHeap::collect(GCCause::Cause cause) { void ParallelScavengeHeap::collect(GCCause::Cause cause) {
assert(!Heap_lock->owned_by_self(), assert(!Heap_lock->owned_by_self(),
......
...@@ -129,8 +129,8 @@ class ParallelScavengeHeap : public CollectedHeap { ...@@ -129,8 +129,8 @@ class ParallelScavengeHeap : public CollectedHeap {
return perm_gen()->is_in(p); return perm_gen()->is_in(p);
} }
static bool is_in_young(oop *p); // reserved part inline bool is_in_young(oop p); // reserved part
static bool is_in_old_or_perm(oop *p); // reserved part inline bool is_in_old_or_perm(oop p); // reserved part
// Memory allocation. "gc_time_limit_was_exceeded" will // Memory allocation. "gc_time_limit_was_exceeded" will
// be set to true if the adaptive size policy determine that // be set to true if the adaptive size policy determine that
...@@ -191,6 +191,10 @@ class ParallelScavengeHeap : public CollectedHeap { ...@@ -191,6 +191,10 @@ class ParallelScavengeHeap : public CollectedHeap {
return true; return true;
} }
// Return true if we don't we need a store barrier for
// initializing stores to an object at this address.
virtual bool can_elide_initializing_store_barrier(oop new_obj);
// Can a compiler elide a store barrier when it writes // Can a compiler elide a store barrier when it writes
// a permanent oop into the heap? Applies when the compiler // a permanent oop into the heap? Applies when the compiler
// is storing x to the heap, where x->is_perm() is true. // is storing x to the heap, where x->is_perm() is true.
......
...@@ -41,3 +41,11 @@ inline void ParallelScavengeHeap::invoke_full_gc(bool maximum_compaction) ...@@ -41,3 +41,11 @@ inline void ParallelScavengeHeap::invoke_full_gc(bool maximum_compaction)
PSMarkSweep::invoke(maximum_compaction); PSMarkSweep::invoke(maximum_compaction);
} }
} }
inline bool ParallelScavengeHeap::is_in_young(oop p) {
return young_gen()->is_in_reserved(p);
}
inline bool ParallelScavengeHeap::is_in_old_or_perm(oop p) {
return old_gen()->is_in_reserved(p) || perm_gen()->is_in_reserved(p);
}
...@@ -137,6 +137,89 @@ HeapWord* CollectedHeap::allocate_from_tlab_slow(Thread* thread, size_t size) { ...@@ -137,6 +137,89 @@ HeapWord* CollectedHeap::allocate_from_tlab_slow(Thread* thread, size_t size) {
return obj; return obj;
} }
void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) {
MemRegion deferred = thread->deferred_card_mark();
if (!deferred.is_empty()) {
{
// Verify that the storage points to a parsable object in heap
DEBUG_ONLY(oop old_obj = oop(deferred.start());)
assert(is_in(old_obj), "Not in allocated heap");
assert(!can_elide_initializing_store_barrier(old_obj),
"Else should have been filtered in defer_store_barrier()");
assert(!is_in_permanent(old_obj), "Sanity: not expected");
assert(old_obj->is_oop(true), "Not an oop");
assert(old_obj->is_parsable(), "Will not be concurrently parsable");
assert(deferred.word_size() == (size_t)(old_obj->size()),
"Mismatch: multiple objects?");
}
BarrierSet* bs = barrier_set();
assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
bs->write_region(deferred);
// "Clear" the deferred_card_mark field
thread->set_deferred_card_mark(MemRegion());
}
assert(thread->deferred_card_mark().is_empty(), "invariant");
}
// Helper for ReduceInitialCardMarks. For performance,
// compiled code may elide card-marks for initializing stores
// to a newly allocated object along the fast-path. We
// compensate for such elided card-marks as follows:
// (a) Generational, non-concurrent collectors, such as
// GenCollectedHeap(ParNew,DefNew,Tenured) and
// ParallelScavengeHeap(ParallelGC, ParallelOldGC)
// need the card-mark if and only if the region is
// in the old gen, and do not care if the card-mark
// succeeds or precedes the initializing stores themselves,
// so long as the card-mark is completed before the next
// scavenge. For all these cases, we can do a card mark
// at the point at which we do a slow path allocation
// in the old gen. For uniformity, however, we end
// up using the same scheme (see below) for all three
// cases (deferring the card-mark appropriately).
// (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires
// in addition that the card-mark for an old gen allocated
// object strictly follow any associated initializing stores.
// In these cases, the memRegion remembered below is
// used to card-mark the entire region either just before the next
// slow-path allocation by this thread or just before the next scavenge or
// CMS-associated safepoint, whichever of these events happens first.
// (The implicit assumption is that the object has been fully
// initialized by this point, a fact that we assert when doing the
// card-mark.)
// (c) G1CollectedHeap(G1) uses two kinds of write barriers. When a
// G1 concurrent marking is in progress an SATB (pre-write-)barrier is
// is used to remember the pre-value of any store. Initializing
// stores will not need this barrier, so we need not worry about
// compensating for the missing pre-barrier here. Turning now
// to the post-barrier, we note that G1 needs a RS update barrier
// which simply enqueues a (sequence of) dirty cards which may
// optionally be refined by the concurrent update threads. Note
// that this barrier need only be applied to a non-young write,
// but, like in CMS, because of the presence of concurrent refinement
// (much like CMS' precleaning), must strictly follow the oop-store.
// Thus, using the same protocol for maintaining the intended
// invariants turns out, serendepitously, to be the same for all
// three collectors/heap types above.
//
// For each future collector, this should be reexamined with
// that specific collector in mind.
oop CollectedHeap::defer_store_barrier(JavaThread* thread, oop new_obj) {
// If a previous card-mark was deferred, flush it now.
flush_deferred_store_barrier(thread);
if (can_elide_initializing_store_barrier(new_obj)) {
// The deferred_card_mark region should be empty
// following the flush above.
assert(thread->deferred_card_mark().is_empty(), "Error");
} else {
// Remember info for the newly deferred store barrier
MemRegion deferred = MemRegion((HeapWord*)new_obj, new_obj->size());
assert(!deferred.is_empty(), "Error");
thread->set_deferred_card_mark(deferred);
}
return new_obj;
}
size_t CollectedHeap::filler_array_hdr_size() { size_t CollectedHeap::filler_array_hdr_size() {
return size_t(arrayOopDesc::header_size(T_INT)); return size_t(arrayOopDesc::header_size(T_INT));
} }
...@@ -225,16 +308,6 @@ void CollectedHeap::fill_with_objects(HeapWord* start, size_t words) ...@@ -225,16 +308,6 @@ void CollectedHeap::fill_with_objects(HeapWord* start, size_t words)
fill_with_object_impl(start, words); fill_with_object_impl(start, words);
} }
oop CollectedHeap::new_store_barrier(oop new_obj) {
// %%% This needs refactoring. (It was imported from the server compiler.)
guarantee(can_elide_tlab_store_barriers(), "store barrier elision not supported");
BarrierSet* bs = this->barrier_set();
assert(bs->has_write_region_opt(), "Barrier set does not have write_region");
int new_size = new_obj->size();
bs->write_region(MemRegion((HeapWord*)new_obj, new_size));
return new_obj;
}
HeapWord* CollectedHeap::allocate_new_tlab(size_t size) { HeapWord* CollectedHeap::allocate_new_tlab(size_t size) {
guarantee(false, "thread-local allocation buffers not supported"); guarantee(false, "thread-local allocation buffers not supported");
return NULL; return NULL;
......
...@@ -415,9 +415,14 @@ class CollectedHeap : public CHeapObj { ...@@ -415,9 +415,14 @@ class CollectedHeap : public CHeapObj {
guarantee(false, "thread-local allocation buffers not supported"); guarantee(false, "thread-local allocation buffers not supported");
return 0; return 0;
} }
// Can a compiler initialize a new object without store barriers? // Can a compiler initialize a new object without store barriers?
// This permission only extends from the creation of a new object // This permission only extends from the creation of a new object
// via a TLAB up to the first subsequent safepoint. // via a TLAB up to the first subsequent safepoint. If such permission
// is granted for this heap type, the compiler promises to call
// defer_store_barrier() below on any slow path allocation of
// a new object for which such initializing store barriers will
// have been elided.
virtual bool can_elide_tlab_store_barriers() const = 0; virtual bool can_elide_tlab_store_barriers() const = 0;
// If a compiler is eliding store barriers for TLAB-allocated objects, // If a compiler is eliding store barriers for TLAB-allocated objects,
...@@ -426,7 +431,18 @@ class CollectedHeap : public CHeapObj { ...@@ -426,7 +431,18 @@ class CollectedHeap : public CHeapObj {
// promises to call this function on such a slow-path-allocated // promises to call this function on such a slow-path-allocated
// object before performing initializations that have elided // object before performing initializations that have elided
// store barriers. Returns new_obj, or maybe a safer copy thereof. // store barriers. Returns new_obj, or maybe a safer copy thereof.
virtual oop new_store_barrier(oop new_obj); virtual oop defer_store_barrier(JavaThread* thread, oop new_obj);
// Answers whether an initializing store to a new object currently
// allocated at the given address doesn't need a (deferred) store
// barrier. Returns "true" if it doesn't need an initializing
// store barrier; answers "false" if it does.
virtual bool can_elide_initializing_store_barrier(oop new_obj) = 0;
// If the CollectedHeap was asked to defer a store barrier above,
// this informs it to flush such a deferred store barrier to the
// remembered set.
virtual void flush_deferred_store_barrier(JavaThread* thread);
// Can a compiler elide a store barrier when it writes // Can a compiler elide a store barrier when it writes
// a permanent oop into the heap? Applies when the compiler // a permanent oop into the heap? Applies when the compiler
......
...@@ -260,6 +260,17 @@ public: ...@@ -260,6 +260,17 @@ public:
return true; return true;
} }
// We don't need barriers for stores to objects in the
// young gen and, a fortiori, for initializing stores to
// objects therein. This applies to {DefNew,ParNew}+{Tenured,CMS}
// only and may need to be re-examined in case other
// kinds of collectors are implemented in the future.
virtual bool can_elide_initializing_store_barrier(oop new_obj) {
assert(UseParNewGC || UseSerialGC || UseConcMarkSweepGC,
"Check can_elide_initializing_store_barrier() for this collector");
return is_in_youngest((void*)new_obj);
}
// Can a compiler elide a store barrier when it writes // Can a compiler elide a store barrier when it writes
// a permanent oop into the heap? Applies when the compiler // a permanent oop into the heap? Applies when the compiler
// is storing x to the heap, where x->is_perm() is true. // is storing x to the heap, where x->is_perm() is true.
......
...@@ -3186,6 +3186,15 @@ void GraphKit::write_barrier_post(Node* oop_store, ...@@ -3186,6 +3186,15 @@ void GraphKit::write_barrier_post(Node* oop_store,
return; return;
} }
if (use_ReduceInitialCardMarks()
&& obj == just_allocated_object(control())) {
// We can skip marks on a freshly-allocated object in Eden.
// Keep this code in sync with maybe_defer_card_mark() in runtime.cpp.
// That routine informs GC to take appropriate compensating steps
// so as to make this card-mark elision safe.
return;
}
if (!use_precise) { if (!use_precise) {
// All card marks for a (non-array) instance are in one place: // All card marks for a (non-array) instance are in one place:
adr = obj; adr = obj;
......
...@@ -4160,13 +4160,13 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) { ...@@ -4160,13 +4160,13 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
result_mem ->set_req(_objArray_path, reset_memory()); result_mem ->set_req(_objArray_path, reset_memory());
} }
} }
// We can dispense with card marks if we know the allocation
// comes out of eden (TLAB)... In fact, ReduceInitialCardMarks
// causes the non-eden paths to simulate a fresh allocation,
// insofar that no further card marks are required to initialize
// the object.
// Otherwise, there are no card marks to worry about. // Otherwise, there are no card marks to worry about.
// (We can dispense with card marks if we know the allocation
// comes out of eden (TLAB)... In fact, ReduceInitialCardMarks
// causes the non-eden paths to take compensating steps to
// simulate a fresh allocation, so that no further
// card marks are required in compiled code to initialize
// the object.)
if (!stopped()) { if (!stopped()) {
copy_to_clone(obj, alloc_obj, obj_size, true, false); copy_to_clone(obj, alloc_obj, obj_size, true, false);
......
...@@ -143,18 +143,20 @@ const char* OptoRuntime::stub_name(address entry) { ...@@ -143,18 +143,20 @@ const char* OptoRuntime::stub_name(address entry) {
// We failed the fast-path allocation. Now we need to do a scavenge or GC // We failed the fast-path allocation. Now we need to do a scavenge or GC
// and try allocation again. // and try allocation again.
void OptoRuntime::do_eager_card_mark(JavaThread* thread) { void OptoRuntime::maybe_defer_card_mark(JavaThread* thread) {
// After any safepoint, just before going back to compiled code, // After any safepoint, just before going back to compiled code,
// we perform a card mark. This lets the compiled code omit // we inform the GC that we will be doing initializing writes to
// card marks for initialization of new objects. // this object in the future without emitting card-marks, so
// Keep this code consistent with GraphKit::store_barrier. // GC may take any compensating steps.
// NOTE: Keep this code consistent with GraphKit::store_barrier.
oop new_obj = thread->vm_result(); oop new_obj = thread->vm_result();
if (new_obj == NULL) return; if (new_obj == NULL) return;
assert(Universe::heap()->can_elide_tlab_store_barriers(), assert(Universe::heap()->can_elide_tlab_store_barriers(),
"compiler must check this first"); "compiler must check this first");
new_obj = Universe::heap()->new_store_barrier(new_obj); // GC may decide to give back a safer copy of new_obj.
new_obj = Universe::heap()->defer_store_barrier(thread, new_obj);
thread->set_vm_result(new_obj); thread->set_vm_result(new_obj);
} }
...@@ -197,8 +199,8 @@ JRT_BLOCK_ENTRY(void, OptoRuntime::new_instance_C(klassOopDesc* klass, JavaThrea ...@@ -197,8 +199,8 @@ JRT_BLOCK_ENTRY(void, OptoRuntime::new_instance_C(klassOopDesc* klass, JavaThrea
JRT_BLOCK_END; JRT_BLOCK_END;
if (GraphKit::use_ReduceInitialCardMarks()) { if (GraphKit::use_ReduceInitialCardMarks()) {
// do them now so we don't have to do them on the fast path // inform GC that we won't do card marks for initializing writes.
do_eager_card_mark(thread); maybe_defer_card_mark(thread);
} }
JRT_END JRT_END
...@@ -236,8 +238,8 @@ JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_C(klassOopDesc* array_type, int len ...@@ -236,8 +238,8 @@ JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_C(klassOopDesc* array_type, int len
JRT_BLOCK_END; JRT_BLOCK_END;
if (GraphKit::use_ReduceInitialCardMarks()) { if (GraphKit::use_ReduceInitialCardMarks()) {
// do them now so we don't have to do them on the fast path // inform GC that we won't do card marks for initializing writes.
do_eager_card_mark(thread); maybe_defer_card_mark(thread);
} }
JRT_END JRT_END
......
...@@ -133,8 +133,8 @@ class OptoRuntime : public AllStatic { ...@@ -133,8 +133,8 @@ class OptoRuntime : public AllStatic {
// Allocate storage for a objArray or typeArray // Allocate storage for a objArray or typeArray
static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread); static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread);
// Post-allocation step for implementing ReduceInitialCardMarks: // Post-slow-path-allocation step for implementing ReduceInitialCardMarks:
static void do_eager_card_mark(JavaThread* thread); static void maybe_defer_card_mark(JavaThread* thread);
// Allocate storage for a multi-dimensional arrays // Allocate storage for a multi-dimensional arrays
// Note: needs to be fixed for arbitrary number of dimensions // Note: needs to be fixed for arbitrary number of dimensions
......
...@@ -1213,6 +1213,7 @@ JavaThread::JavaThread(bool is_attaching) : ...@@ -1213,6 +1213,7 @@ JavaThread::JavaThread(bool is_attaching) :
{ {
initialize(); initialize();
_is_attaching = is_attaching; _is_attaching = is_attaching;
assert(_deferred_card_mark.is_empty(), "Default MemRegion ctor");
} }
bool JavaThread::reguard_stack(address cur_sp) { bool JavaThread::reguard_stack(address cur_sp) {
...@@ -2318,6 +2319,10 @@ void JavaThread::gc_prologue() { ...@@ -2318,6 +2319,10 @@ void JavaThread::gc_prologue() {
void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) { void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) {
// Flush deferred store-barriers, if any, associated with
// initializing stores done by this JavaThread in the current epoch.
Universe::heap()->flush_deferred_store_barrier(this);
// The ThreadProfiler oops_do is done from FlatProfiler::oops_do // The ThreadProfiler oops_do is done from FlatProfiler::oops_do
// since there may be more than one thread using each ThreadProfiler. // since there may be more than one thread using each ThreadProfiler.
......
...@@ -687,6 +687,11 @@ class JavaThread: public Thread { ...@@ -687,6 +687,11 @@ class JavaThread: public Thread {
oop _vm_result; // Used to pass back an oop result into Java code, GC-preserved oop _vm_result; // Used to pass back an oop result into Java code, GC-preserved
oop _vm_result_2; // Used to pass back an oop result into Java code, GC-preserved oop _vm_result_2; // Used to pass back an oop result into Java code, GC-preserved
// See ReduceInitialCardMarks: this holds the precise space interval of
// the most recent slow path allocation for which compiled code has
// elided card-marks for performance along the fast-path.
MemRegion _deferred_card_mark;
MonitorChunk* _monitor_chunks; // Contains the off stack monitors MonitorChunk* _monitor_chunks; // Contains the off stack monitors
// allocated during deoptimization // allocated during deoptimization
// and by JNI_MonitorEnter/Exit // and by JNI_MonitorEnter/Exit
...@@ -1082,6 +1087,9 @@ class JavaThread: public Thread { ...@@ -1082,6 +1087,9 @@ class JavaThread: public Thread {
oop vm_result_2() const { return _vm_result_2; } oop vm_result_2() const { return _vm_result_2; }
void set_vm_result_2 (oop x) { _vm_result_2 = x; } void set_vm_result_2 (oop x) { _vm_result_2 = x; }
MemRegion deferred_card_mark() const { return _deferred_card_mark; }
void set_deferred_card_mark(MemRegion mr) { _deferred_card_mark = mr; }
// Exception handling for compiled methods // Exception handling for compiled methods
oop exception_oop() const { return _exception_oop; } oop exception_oop() const { return _exception_oop; }
int exception_stack_size() const { return _exception_stack_size; } int exception_stack_size() const { return _exception_stack_size; }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册