diff --git a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp index bec5507ef76946645cf4d3d5a87f86e73338f645..3628875eb3a197d1102b064a36b650647f2416bd 100644 --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp @@ -34,10 +34,12 @@ ParScanThreadState::ParScanThreadState(Space* to_space_, Generation* old_gen_, int thread_num_, ObjToScanQueueSet* work_queue_set_, + GrowableArray** overflow_stack_set_, size_t desired_plab_sz_, ParallelTaskTerminator& term_) : _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_), _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false), + _overflow_stack(overflow_stack_set_[thread_num_]), _ageTable(false), // false ==> not the global age table, no perf data. _to_space_alloc_buffer(desired_plab_sz_), _to_space_closure(gen_, this), _old_gen_closure(gen_, this), @@ -57,11 +59,6 @@ ParScanThreadState::ParScanThreadState(Space* to_space_, _start = os::elapsedTime(); _old_gen_closure.set_generation(old_gen_); _old_gen_root_closure.set_generation(old_gen_); - if (UseCompressedOops) { - _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray(512, true); - } else { - _overflow_stack = NULL; - } } #ifdef _MSC_VER #pragma warning( pop ) @@ -155,7 +152,7 @@ void ParScanThreadState::trim_queues(int max_size) { } bool ParScanThreadState::take_from_overflow_stack() { - assert(UseCompressedOops, "Else should not call"); + assert(ParGCUseLocalOverflow, "Else should not call"); assert(young_gen()->overflow_list() == NULL, "Error"); ObjToScanQueue* queue = work_queue(); GrowableArray* of_stack = overflow_stack(); @@ -183,7 +180,7 @@ bool ParScanThreadState::take_from_overflow_stack() { } void ParScanThreadState::push_on_overflow_stack(oop p) { - assert(UseCompressedOops, "Else should not call"); + assert(ParGCUseLocalOverflow, "Else should not call"); overflow_stack()->push(p); assert(young_gen()->overflow_list() == NULL, "Error"); } @@ -260,6 +257,7 @@ public: ParNewGeneration& gen, Generation& old_gen, ObjToScanQueueSet& queue_set, + GrowableArray** overflow_stacks_, size_t desired_plab_sz, ParallelTaskTerminator& term); inline ParScanThreadState& thread_sate(int i); @@ -282,6 +280,7 @@ private: ParScanThreadStateSet::ParScanThreadStateSet( int num_threads, Space& to_space, ParNewGeneration& gen, Generation& old_gen, ObjToScanQueueSet& queue_set, + GrowableArray** overflow_stack_set_, size_t desired_plab_sz, ParallelTaskTerminator& term) : ResourceArray(sizeof(ParScanThreadState), num_threads), _gen(gen), _next_gen(old_gen), _term(term), @@ -292,7 +291,7 @@ ParScanThreadStateSet::ParScanThreadStateSet( for (int i = 0; i < num_threads; ++i) { new ((ParScanThreadState*)_data + i) ParScanThreadState(&to_space, &gen, &old_gen, i, &queue_set, - desired_plab_sz, term); + overflow_stack_set_, desired_plab_sz, term); } } @@ -519,6 +518,17 @@ ParNewGeneration(ReservedSpace rs, size_t initial_byte_size, int level) for (uint i2 = 0; i2 < ParallelGCThreads; i2++) _task_queues->queue(i2)->initialize(); + _overflow_stacks = NEW_C_HEAP_ARRAY(GrowableArray*, ParallelGCThreads); + guarantee(_overflow_stacks != NULL, "Overflow stack set allocation failure"); + for (uint i = 0; i < ParallelGCThreads; i++) { + if (ParGCUseLocalOverflow) { + _overflow_stacks[i] = new (ResourceObj::C_HEAP) GrowableArray(512, true); + guarantee(_overflow_stacks[i] != NULL, "Overflow Stack allocation failure."); + } else { + _overflow_stacks[i] = NULL; + } + } + if (UsePerfData) { EXCEPTION_MARK; ResourceMark rm; @@ -784,7 +794,7 @@ void ParNewGeneration::collect(bool full, ParallelTaskTerminator _term(workers->total_workers(), task_queues()); ParScanThreadStateSet thread_state_set(workers->total_workers(), *to(), *this, *_next_gen, *task_queues(), - desired_plab_sz(), _term); + _overflow_stacks, desired_plab_sz(), _term); ParNewGenTask tsk(this, _next_gen, reserved().end(), &thread_state_set); int n_workers = workers->total_workers(); @@ -1238,11 +1248,12 @@ bool ParNewGeneration::should_simulate_overflow() { #define BUSY (oop(0x1aff1aff)) void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) { assert(is_in_reserved(from_space_obj), "Should be from this generation"); - if (UseCompressedOops) { + if (ParGCUseLocalOverflow) { // In the case of compressed oops, we use a private, not-shared // overflow stack. par_scan_state->push_on_overflow_stack(from_space_obj); } else { + assert(!UseCompressedOops, "Error"); // if the object has been forwarded to itself, then we cannot // use the klass pointer for the linked list. Instead we have // to allocate an oopDesc in the C-Heap and use that for the linked list. @@ -1275,9 +1286,10 @@ void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadSt bool ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) { bool res; - if (UseCompressedOops) { + if (ParGCUseLocalOverflow) { res = par_scan_state->take_from_overflow_stack(); } else { + assert(!UseCompressedOops, "Error"); res = take_from_overflow_list_work(par_scan_state); } return res; @@ -1305,6 +1317,7 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan (size_t)ParGCDesiredObjsFromOverflowList); assert(par_scan_state->overflow_stack() == NULL, "Error"); + assert(!UseCompressedOops, "Error"); if (_overflow_list == NULL) return false; // Otherwise, there was something there; try claiming the list. diff --git a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp index 7f0015bd676cf00f74402c73a348ebaeb705dc30..3e2ab80af2edfeb78db77ef7912c56a8178836e4 100644 --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp @@ -33,8 +33,8 @@ class ParEvacuateFollowersClosure; // but they must be here to allow ParScanClosure::do_oop_work to be defined // in genOopClosures.inline.hpp. -typedef OopTaskQueue ObjToScanQueue; -typedef OopTaskQueueSet ObjToScanQueueSet; +typedef OopTaskQueue ObjToScanQueue; +typedef OopTaskQueueSet ObjToScanQueueSet; // Enable this to get push/pop/steal stats. const int PAR_STATS_ENABLED = 0; @@ -116,7 +116,9 @@ class ParScanThreadState { ParScanThreadState(Space* to_space_, ParNewGeneration* gen_, Generation* old_gen_, int thread_num_, - ObjToScanQueueSet* work_queue_set_, size_t desired_plab_sz_, + ObjToScanQueueSet* work_queue_set_, + GrowableArray** overflow_stack_set_, + size_t desired_plab_sz_, ParallelTaskTerminator& term_); public: @@ -296,9 +298,12 @@ class ParNewGeneration: public DefNewGeneration { char pad[64 - sizeof(ObjToScanQueue)]; // prevent false sharing }; - // The per-thread work queues, available here for stealing. + // The per-worker-thread work queues ObjToScanQueueSet* _task_queues; + // Per-worker-thread local overflow stacks + GrowableArray** _overflow_stacks; + // Desired size of survivor space plab's PLABStats _plab_stats; diff --git a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp index 5dadeb34e714ba6ab2519847d6bcfcced29232ed..76577c3c569f20e2da45212ebe57726da06a9959 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp @@ -508,6 +508,7 @@ ParallelCompactData::summarize_split_space(size_t src_region, assert(destination <= target_end, "sanity"); assert(destination + _region_data[src_region].data_size() > target_end, "region should not fit into target space"); + assert(is_region_aligned(target_end), "sanity"); size_t split_region = src_region; HeapWord* split_destination = destination; @@ -538,14 +539,12 @@ ParallelCompactData::summarize_split_space(size_t src_region, // max(top, max(new_top, clear_top)) // // where clear_top is a new field in SpaceInfo. Would have to set clear_top - // to destination + partial_obj_size, where both have the values passed to - // this routine. + // to target_end. const RegionData* const sr = region(split_region); const size_t beg_idx = addr_to_region_idx(region_align_up(sr->destination() + sr->partial_obj_size())); - const size_t end_idx = - addr_to_region_idx(region_align_up(destination + partial_obj_size)); + const size_t end_idx = addr_to_region_idx(target_end); if (TraceParallelOldGCSummaryPhase) { gclog_or_tty->print_cr("split: clearing source_region field in [" diff --git a/src/share/vm/runtime/arguments.cpp b/src/share/vm/runtime/arguments.cpp index 96ead39567f06a34d3f69cdc176175f477bf4c66..53ea39cd0829bde2d26ee0450b9b5e3e8d0a4a29 100644 --- a/src/share/vm/runtime/arguments.cpp +++ b/src/share/vm/runtime/arguments.cpp @@ -969,7 +969,7 @@ void Arguments::set_parnew_gc_flags() { } else { no_shared_spaces(); - // By default YoungPLABSize and OldPLABSize are set to 4096 and 1024 correspondinly, + // By default YoungPLABSize and OldPLABSize are set to 4096 and 1024 respectively, // these settings are default for Parallel Scavenger. For ParNew+Tenured configuration // we set them to 1024 and 1024. // See CR 6362902. @@ -985,6 +985,16 @@ void Arguments::set_parnew_gc_flags() { if (AlwaysTenure) { FLAG_SET_CMDLINE(intx, MaxTenuringThreshold, 0); } + // When using compressed oops, we use local overflow stacks, + // rather than using a global overflow list chained through + // the klass word of the object's pre-image. + if (UseCompressedOops && !ParGCUseLocalOverflow) { + if (!FLAG_IS_DEFAULT(ParGCUseLocalOverflow)) { + warning("Forcing +ParGCUseLocalOverflow: needed if using compressed references"); + } + FLAG_SET_DEFAULT(ParGCUseLocalOverflow, true); + } + assert(ParGCUseLocalOverflow || !UseCompressedOops, "Error"); } } diff --git a/src/share/vm/runtime/globals.hpp b/src/share/vm/runtime/globals.hpp index ec14dcf52bcd14d2b81b70ff01559de342e0795a..3bbf2bd6cf59e8d298d377ff93765a37dc156183 100644 --- a/src/share/vm/runtime/globals.hpp +++ b/src/share/vm/runtime/globals.hpp @@ -1323,8 +1323,11 @@ class CommandLineFlags { product(intx, ParGCArrayScanChunk, 50, \ "Scan a subset and push remainder, if array is bigger than this") \ \ + product(bool, ParGCUseLocalOverflow, false, \ + "Instead of a global overflow list, use local overflow stacks") \ + \ product(bool, ParGCTrimOverflow, true, \ - "Eagerly trim the overflow lists (useful for UseCompressedOops") \ + "Eagerly trim the local overflow lists (when ParGCUseLocalOverflow") \ \ notproduct(bool, ParGCWorkQueueOverflowALot, false, \ "Whether we should simulate work queue overflow in ParNew") \