Merge

00dc285e · asaha · c0747015 · 7459f9bf · 00dc285e · 00dc285e
60 changed file
--- a/.hgtags
+++ b/.hgtags
@@ -626,3 +626,5 @@ d51ef6da82b486e7b2b3c08eef9ca0a186935ded hs25.60-b07
 353e580ce6878d80c7b7cd27f8ad24609b12c58b jdk8u60-b07
 a72a4192a36d6d84766d6135fe6515346c742007 hs25.60-b08
 bf68e15dc8fe73eeb1eb3c656df51fdb1f707a97 jdk8u60-b08
+d937e6a0674841d670232ecf1611f52e1ae998e7 hs25.60-b09
+f1058b5c6294235d8ad032dcc72c8f8bc202cb5a jdk8u60-b09
--- a/make/aix/makefiles/adlc.make
+++ b/make/aix/makefiles/adlc.make
 #
-# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -140,13 +140,7 @@ ADLCFLAGS += $(SYSDEFS)
 # Note "+="; it is a hook so flags.make can add more flags, like -g or -DFOO.
 ADLCFLAGS += -q -T
-# Normally, debugging is done directly on the ad_<arch>*.cpp files.
-# But -g will put #line directives in those files pointing back to <arch>.ad.
-# Some builds of gcc 3.2 have a bug that gets tickled by the extra #line directives
-# so skip it for 3.2 and ealier.
-ifneq "$(shell expr \( $(CC_VER_MAJOR) \> 3 \) \| \( \( $(CC_VER_MAJOR) = 3 \) \& \( $(CC_VER_MINOR) \>= 3 \) \))" "0"
 ADLCFLAGS += -g
-endif
 ifdef LP64
 ADLCFLAGS += -D_LP64

--- a/make/aix/makefiles/ppc64.make
+++ b/make/aix/makefiles/ppc64.make
 #
-# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2004, 2015, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2012, 2013 SAP AG. All rights reserved.
+# Copyright 2012, 2015 SAP AG. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -46,7 +46,9 @@ CFLAGS += -qsuppress=1540-0198
 #  - 1540-1090 (I) The destructor of "..." might not be called.
 #  - 1500-010: (W) WARNING in ...: Infinite loop.  Program may not stop.
 #    There are several infinite loops in the vm, suppress.
-CFLAGS += -qsuppress=1540-1090 -qsuppress=1500-010
+#  - 1540-1639 (I) The behavior of long type bit fields has changed ...
+#                  ... long type bit fields now default to long, not int.
+CFLAGS += -qsuppress=1540-1090 -qsuppress=1500-010 -qsuppress=1540-1639
 # Suppress 
 #  - 540-1088 (W) The exception specification is being ignored.
@@ -69,9 +71,6 @@ OPT_CFLAGS += -qstrict
 OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
 OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT)
-# xlc 10.01 parameters for ipa compile.
-QIPA_COMPILE=$(if $(CXX_IS_V10),-qipa)
 # Xlc 10.1 parameters for aggressive optimization:
 # - qhot=level=1: Most aggressive loop optimizations.
 # - qignerrno: Assume errno is not modified by system calls.
@@ -86,7 +85,7 @@ QV10_OPT_CONSERVATIVE=$(if $(CXX_IS_V10),-qhot=level=1 -qignerrno -qinline)
 OPT_CFLAGS/synchronizer.o = $(OPT_CFLAGS) -qnoinline
 # Set all the xlC V10.1 options here.
-OPT_CFLAGS += $(QIPA_COMPILE) $(QV10_OPT) $(QV10_OPT_AGGRESSIVE)
+OPT_CFLAGS += $(QV10_OPT) $(QV10_OPT_AGGRESSIVE)
 export OBJECT_MODE=64

--- a/make/aix/makefiles/xlc.make
+++ b/make/aix/makefiles/xlc.make
 #
-# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2012, 2013 SAP. All rights reserved.
+# Copyright (c) 2012, 2015 SAP. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -34,13 +34,17 @@ HOSTCC  = $(CC)
 AS  = $(CC) -c
-# get xlc version
+# get xlc version which comes as VV.RR.MMMM.LLLL where 'VV' is the version,
-CXX_VERSION   := $(shell $(CXX) -qversion 2>&1 | sed -n 's/.*Version: \([0-9.]*\)/\1/p')
+# 'RR' is the release, 'MMMM' is the modification and 'LLLL' is the level.
+# We only use 'VV.RR.LLLL' to avoid integer overflows in bash when comparing
+# the version numbers (some shells only support 32-bit integer compares!).
+CXX_VERSION := $(shell $(CXX) -qversion 2>&1 | \
+                   sed -n 's/.*Version: \([0-9]\{2\}\).\([0-9]\{2\}\).[0-9]\{4\}.\([0-9]\{4\}\)/\1\2\3/p')
 # xlc 08.00.0000.0023 and higher supports -qtune=balanced
-CXX_SUPPORTS_BALANCED_TUNING=$(shell if [ $(subst .,,$(CXX_VERSION)) -ge 080000000023 ] ; then echo "true" ; fi)
+CXX_SUPPORTS_BALANCED_TUNING := $(shell if [ $(CXX_VERSION) -ge 08000023 ] ; then echo "true" ; fi)
 # xlc 10.01 is used with aggressive optimizations to boost performance
-CXX_IS_V10=$(shell if [ $(subst .,,$(CXX_VERSION)) -ge 100100000000 ] ; then echo "true" ; fi)
+CXX_IS_V10 := $(shell if [ $(CXX_VERSION) -ge 10010000 ] ; then echo "true" ; fi)
 # check for precompiled headers support
@@ -130,7 +134,7 @@ STATIC_STDCXX = -Wl,-lC_r
 # MAPFLAG = -Xlinker --version-script=FILENAME
 # Build shared library
-SHARED_FLAG = -q64 -b64 -bexpall -G -bnoentry -qmkshrobj -brtl -bnolibpath
+SHARED_FLAG = -q64 -b64 -bexpall -G -bnoentry -qmkshrobj -brtl -bnolibpath -bernotok
 #------------------------------------------------------------------------
 # Debug flags

--- a/make/hotspot_version
+++ b/make/hotspot_version
@@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2015
 HS_MAJOR_VER=25
 HS_MINOR_VER=60
-HS_BUILD_NUMBER=08
+HS_BUILD_NUMBER=09
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8

--- a/src/share/vm/classfile/verifier.cpp
+++ b/src/share/vm/classfile/verifier.cpp
@@ -655,6 +655,7 @@ void ClassVerifier::verify_method(methodHandle m, TRAPS) {
    bool this_uninit = false;  // Set to true when invokespecial <init> initialized 'this'
+    bool verified_exc_handlers = false;
    // Merge with the next instruction
    {
@@ -686,6 +687,18 @@ void ClassVerifier::verify_method(methodHandle m, TRAPS) {
        }
      }
+      // Look for possible jump target in exception handlers and see if it
+      // matches current_frame.  Do this check here for astore*, dstore*,
+      // fstore*, istore*, and lstore* opcodes because they can change the type
+      // state by adding a local.  JVM Spec says that the incoming type state
+      // should be used for this check.  So, do the check here before a possible
+      // local is added to the type state.
+      if (Bytecodes::is_store_into_local(opcode) && bci >= ex_min && bci < ex_max) {
+        verify_exception_handler_targets(
+          bci, this_uninit, &current_frame, &stackmap_table, CHECK_VERIFY(this));
+        verified_exc_handlers = true;
+      }
      switch (opcode) {
        case Bytecodes::_nop :
          no_control_flow = false; break;
@@ -1662,9 +1675,13 @@ void ClassVerifier::verify_method(methodHandle m, TRAPS) {
      }  // end switch
    }  // end Merge with the next instruction
-    // Look for possible jump target in exception handlers and see if it
+    // Look for possible jump target in exception handlers and see if it matches
-    // matches current_frame
+    // current_frame.  Don't do this check if it has already been done (for
-    if (bci >= ex_min && bci < ex_max) {
+    // ([a,d,f,i,l]store* opcodes).  This check cannot be done earlier because
+    // opcodes, such as invokespecial, may set the this_uninit flag.
+    assert(!(verified_exc_handlers && this_uninit),
+      "Exception handler targets got verified before this_uninit got set");
+    if (!verified_exc_handlers && bci >= ex_min && bci < ex_max) {
      verify_exception_handler_targets(
        bci, this_uninit, &current_frame, &stackmap_table, CHECK_VERIFY(this));
    }
@@ -2232,14 +2249,20 @@ void ClassVerifier::verify_field_instructions(RawBytecodeStream* bcs,
 }
 // Look at the method's handlers.  If the bci is in the handler's try block
-// then check if the handler_pc is already on the stack.  If not, push it.
+// then check if the handler_pc is already on the stack.  If not, push it
+// unless the handler has already been scanned.
 void ClassVerifier::push_handlers(ExceptionTable* exhandlers,
+                                  GrowableArray<u4>* handler_list,
                                  GrowableArray<u4>* handler_stack,
                                  u4 bci) {
  int exlength = exhandlers->length();
  for(int x = 0; x < exlength; x++) {
    if (bci >= exhandlers->start_pc(x) && bci < exhandlers->end_pc(x)) {
-      handler_stack->append_if_missing(exhandlers->handler_pc(x));
+      u4 exhandler_pc = exhandlers->handler_pc(x);
+      if (!handler_list->contains(exhandler_pc)) {
+        handler_stack->append_if_missing(exhandler_pc);
+        handler_list->append(exhandler_pc);
+      }
    }
  }
 }
@@ -2257,6 +2280,10 @@ bool ClassVerifier::ends_in_athrow(u4 start_bc_offset) {
  GrowableArray<u4>* bci_stack = new GrowableArray<u4>(30);
  // Create stack for handlers for try blocks containing this handler.
  GrowableArray<u4>* handler_stack = new GrowableArray<u4>(30);
+  // Create list of handlers that have been pushed onto the handler_stack
+  // so that handlers embedded inside of their own TRY blocks only get
+  // scanned once.
+  GrowableArray<u4>* handler_list = new GrowableArray<u4>(30);
  // Create list of visited branch opcodes (goto* and if*).
  GrowableArray<u4>* visited_branches = new GrowableArray<u4>(30);
  ExceptionTable exhandlers(_method());
@@ -2275,7 +2302,7 @@ bool ClassVerifier::ends_in_athrow(u4 start_bc_offset) {
    // If the bytecode is in a TRY block, push its handlers so they
    // will get parsed.
-    push_handlers(&exhandlers, handler_stack, bci);
+    push_handlers(&exhandlers, handler_list, handler_stack, bci);
    switch (opcode) {
      case Bytecodes::_if_icmpeq:

--- a/src/share/vm/classfile/verifier.hpp
+++ b/src/share/vm/classfile/verifier.hpp
@@ -305,9 +305,10 @@ class ClassVerifier : public StackObj {
    bool* this_uninit, constantPoolHandle cp, StackMapTable* stackmap_table,
    TRAPS);
-  // Used by ends_in_athrow() to push all handlers that contain bci onto
+  // Used by ends_in_athrow() to push all handlers that contain bci onto the
-  // the handler_stack, if the handler is not already on the stack.
+  // handler_stack, if the handler has not already been pushed on the stack.
  void push_handlers(ExceptionTable* exhandlers,
+                     GrowableArray<u4>* handler_list,
                     GrowableArray<u4>* handler_stack,
                     u4 bci);

--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@@ -598,7 +598,7 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
  _collector_policy(cp),
  _should_unload_classes(CMSClassUnloadingEnabled),
  _concurrent_cycles_since_last_unload(0),
-  _roots_scanning_options(SharedHeap::SO_None),
+  _roots_scanning_options(GenCollectedHeap::SO_None),
  _inter_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding),
  _intra_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding),
  _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) CMSTracer()),
@@ -3068,7 +3068,7 @@ void CMSCollector::verify_after_remark_work_1() {
  gch->gen_process_roots(_cmsGen->level(),
                         true,   // younger gens are roots
                         true,   // activate StrongRootsScope
-                         SharedHeap::ScanningOption(roots_scanning_options()),
+                         GenCollectedHeap::ScanningOption(roots_scanning_options()),
                         should_unload_classes(),
                         &notOlder,
                         NULL,
@@ -3136,7 +3136,7 @@ void CMSCollector::verify_after_remark_work_2() {
  gch->gen_process_roots(_cmsGen->level(),
                         true,   // younger gens are roots
                         true,   // activate StrongRootsScope
-                         SharedHeap::ScanningOption(roots_scanning_options()),
+                         GenCollectedHeap::ScanningOption(roots_scanning_options()),
                         should_unload_classes(),
                         &notOlder,
                         NULL,
@@ -3327,7 +3327,7 @@ bool ConcurrentMarkSweepGeneration::is_too_full() const {
 void CMSCollector::setup_cms_unloading_and_verification_state() {
  const  bool should_verify =   VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
                             || VerifyBeforeExit;
-  const  int  rso           =   SharedHeap::SO_AllCodeCache;
+  const  int  rso           =   GenCollectedHeap::SO_AllCodeCache;
  // We set the proper root for this CMS cycle here.
  if (should_unload_classes()) {   // Should unload classes this cycle
@@ -3753,7 +3753,7 @@ void CMSCollector::checkpointRootsInitialWork(bool asynch) {
      gch->gen_process_roots(_cmsGen->level(),
                             true,   // younger gens are roots
                             true,   // activate StrongRootsScope
-                             SharedHeap::ScanningOption(roots_scanning_options()),
+                             GenCollectedHeap::ScanningOption(roots_scanning_options()),
                             should_unload_classes(),
                             &notOlder,
                             NULL,
@@ -5254,13 +5254,13 @@ void CMSParInitialMarkTask::work(uint worker_id) {
  gch->gen_process_roots(_collector->_cmsGen->level(),
                         false,     // yg was scanned above
                         false,     // this is parallel code
-                         SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
+                         GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
                         _collector->should_unload_classes(),
                         &par_mri_cl,
                         NULL,
                         &cld_closure);
  assert(_collector->should_unload_classes()
-         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache),
+         || (_collector->CMSCollector::roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
         "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
  _timer.stop();
  if (PrintCMSStatistics != 0) {
@@ -5390,14 +5390,14 @@ void CMSParRemarkTask::work(uint worker_id) {
  gch->gen_process_roots(_collector->_cmsGen->level(),
                         false,     // yg was scanned above
                         false,     // this is parallel code
-                         SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
+                         GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
                         _collector->should_unload_classes(),
                         &par_mrias_cl,
                         NULL,
                         NULL);     // The dirty klasses will be handled below
  assert(_collector->should_unload_classes()
-         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache),
+         || (_collector->CMSCollector::roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
         "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
  _timer.stop();
  if (PrintCMSStatistics != 0) {
@@ -5982,14 +5982,14 @@ void CMSCollector::do_remark_non_parallel() {
    gch->gen_process_roots(_cmsGen->level(),
                           true,  // younger gens as roots
                           false, // use the local StrongRootsScope
-                           SharedHeap::ScanningOption(roots_scanning_options()),
+                           GenCollectedHeap::ScanningOption(roots_scanning_options()),
                           should_unload_classes(),
                           &mrias_cl,
                           NULL,
                           NULL); // The dirty klasses will be handled below
    assert(should_unload_classes()
-           || (roots_scanning_options() & SharedHeap::SO_AllCodeCache),
+           || (roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
           "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
  }

--- a/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp
+++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp
@@ -254,25 +254,23 @@ void MutatorAllocRegion::retire_region(HeapRegion* alloc_region,
 HeapRegion* SurvivorGCAllocRegion::allocate_new_region(size_t word_size,
                                                       bool force) {
  assert(!force, "not supported for GC alloc regions");
-  return _g1h->new_gc_alloc_region(word_size, count(), GCAllocForSurvived);
+  return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Young);
 }
 void SurvivorGCAllocRegion::retire_region(HeapRegion* alloc_region,
                                          size_t allocated_bytes) {
-  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes,
+  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, InCSetState::Young);
-                               GCAllocForSurvived);
 }
 HeapRegion* OldGCAllocRegion::allocate_new_region(size_t word_size,
                                                  bool force) {
  assert(!force, "not supported for GC alloc regions");
-  return _g1h->new_gc_alloc_region(word_size, count(), GCAllocForTenured);
+  return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Old);
 }
 void OldGCAllocRegion::retire_region(HeapRegion* alloc_region,
                                     size_t allocated_bytes) {
-  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes,
+  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, InCSetState::Old);
-                               GCAllocForTenured);
 }
 HeapRegion* OldGCAllocRegion::release() {

--- a/src/share/vm/gc_implementation/g1/g1Allocator.cpp
+++ b/src/share/vm/gc_implementation/g1/g1Allocator.cpp
@@ -59,7 +59,7 @@ void G1Allocator::reuse_retained_old_region(EvacuationInfo& evacuation_info,
      !(retained_region->top() == retained_region->end()) &&
      !retained_region->is_empty() &&
      !retained_region->isHumongous()) {
-    retained_region->record_top_and_timestamp();
+    retained_region->record_timestamp();
    // The retained region was added to the old region set when it was
    // retired. We have to remove it now, since we don't allow regions
    // we allocate to in the region sets. We'll re-add it later, when
@@ -94,6 +94,9 @@ void G1DefaultAllocator::release_gc_alloc_regions(uint no_of_gc_workers, Evacuat
  // want either way so no reason to check explicitly for either
  // condition.
  _retained_old_gc_alloc_region = old_gc_alloc_region(context)->release();
+  if (_retained_old_gc_alloc_region != NULL) {
+    _retained_old_gc_alloc_region->record_retained_region();
+  }
  if (ResizePLAB) {
    _g1h->_survivor_plab_stats.adjust_desired_plab_sz(no_of_gc_workers);
@@ -110,15 +113,16 @@ void G1DefaultAllocator::abandon_gc_alloc_regions() {
 G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
  ParGCAllocBuffer(gclab_word_size), _retired(true) { }
-HeapWord* G1ParGCAllocator::allocate_slow(GCAllocPurpose purpose, size_t word_sz, AllocationContext_t context) {
+HeapWord* G1ParGCAllocator::allocate_direct_or_new_plab(InCSetState dest,
-  HeapWord* obj = NULL;
+                                                        size_t word_sz,
-  size_t gclab_word_size = _g1h->desired_plab_sz(purpose);
+                                                        AllocationContext_t context) {
+  size_t gclab_word_size = _g1h->desired_plab_sz(dest);
  if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) {
-    G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose, context);
+    G1ParGCAllocBuffer* alloc_buf = alloc_buffer(dest, context);
    add_to_alloc_buffer_waste(alloc_buf->words_remaining());
    alloc_buf->retire(false /* end_of_gc */, false /* retain */);
-    HeapWord* buf = _g1h->par_allocate_during_gc(purpose, gclab_word_size, context);
+    HeapWord* buf = _g1h->par_allocate_during_gc(dest, gclab_word_size, context);
    if (buf == NULL) {
      return NULL; // Let caller handle allocation failure.
    }
@@ -126,30 +130,33 @@ HeapWord* G1ParGCAllocator::allocate_slow(GCAllocPurpose purpose, size_t word_sz
    alloc_buf->set_word_size(gclab_word_size);
    alloc_buf->set_buf(buf);
-    obj = alloc_buf->allocate(word_sz);
+    HeapWord* const obj = alloc_buf->allocate(word_sz);
    assert(obj != NULL, "buffer was definitely big enough...");
+    return obj;
  } else {
-    obj = _g1h->par_allocate_during_gc(purpose, word_sz, context);
+    return _g1h->par_allocate_during_gc(dest, word_sz, context);
  }
-  return obj;
 }
 G1DefaultParGCAllocator::G1DefaultParGCAllocator(G1CollectedHeap* g1h) :
-            G1ParGCAllocator(g1h),
+  G1ParGCAllocator(g1h),
-            _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)),
+  _surviving_alloc_buffer(g1h->desired_plab_sz(InCSetState::Young)),
-            _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)) {
+  _tenured_alloc_buffer(g1h->desired_plab_sz(InCSetState::Old)) {
+  for (uint state = 0; state < InCSetState::Num; state++) {
-  _alloc_buffers[GCAllocForSurvived] = &_surviving_alloc_buffer;
+    _alloc_buffers[state] = NULL;
-  _alloc_buffers[GCAllocForTenured]  = &_tenured_alloc_buffer;
+  }
+  _alloc_buffers[InCSetState::Young] = &_surviving_alloc_buffer;
+  _alloc_buffers[InCSetState::Old]  = &_tenured_alloc_buffer;
 }
 void G1DefaultParGCAllocator::retire_alloc_buffers() {
-  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+  for (uint state = 0; state < InCSetState::Num; state++) {
-    size_t waste = _alloc_buffers[ap]->words_remaining();
+    G1ParGCAllocBuffer* const buf = _alloc_buffers[state];
-    add_to_alloc_buffer_waste(waste);
+    if (buf != NULL) {
-    _alloc_buffers[ap]->flush_stats_and_retire(_g1h->stats_for_purpose((GCAllocPurpose)ap),
+      add_to_alloc_buffer_waste(buf->words_remaining());
-                                               true /* end_of_gc */,
+      buf->flush_stats_and_retire(_g1h->alloc_buffer_stats(state),
-                                               false /* retain */);
+                                  true /* end_of_gc */,
+                                  false /* retain */);
+    }
  }
 }
--- a/src/share/vm/gc_implementation/g1/g1Allocator.hpp
+++ b/src/share/vm/gc_implementation/g1/g1Allocator.hpp
@@ -27,14 +27,9 @@
 #include "gc_implementation/g1/g1AllocationContext.hpp"
 #include "gc_implementation/g1/g1AllocRegion.hpp"
+#include "gc_implementation/g1/g1InCSetState.hpp"
 #include "gc_implementation/shared/parGCAllocBuffer.hpp"
-enum GCAllocPurpose {
-  GCAllocForTenured,
-  GCAllocForSurvived,
-  GCAllocPurposeCount
-};
 // Base class for G1 allocators.
 class G1Allocator : public CHeapObj<mtGC> {
  friend class VMStructs;
@@ -178,20 +173,40 @@ class G1ParGCAllocator : public CHeapObj<mtGC> {
 protected:
  G1CollectedHeap* _g1h;
+  // The survivor alignment in effect in bytes.
+  // == 0 : don't align survivors
+  // != 0 : align survivors to that alignment
+  // These values were chosen to favor the non-alignment case since some
+  // architectures have a special compare against zero instructions.
+  const uint _survivor_alignment_bytes;
  size_t _alloc_buffer_waste;
  size_t _undo_waste;
  void add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
  void add_to_undo_waste(size_t waste)         { _undo_waste += waste; }
-  HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz, AllocationContext_t context);
  virtual void retire_alloc_buffers() = 0;
-  virtual G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose, AllocationContext_t context) = 0;
+  virtual G1ParGCAllocBuffer* alloc_buffer(InCSetState dest, AllocationContext_t context) = 0;
+  // Calculate the survivor space object alignment in bytes. Returns that or 0 if
+  // there are no restrictions on survivor alignment.
+  static uint calc_survivor_alignment_bytes() {
+    assert(SurvivorAlignmentInBytes >= ObjectAlignmentInBytes, "sanity");
+    if (SurvivorAlignmentInBytes == ObjectAlignmentInBytes) {
+      // No need to align objects in the survivors differently, return 0
+      // which means "survivor alignment is not used".
+      return 0;
+    } else {
+      assert(SurvivorAlignmentInBytes > 0, "sanity");
+      return SurvivorAlignmentInBytes;
+    }
+  }
 public:
  G1ParGCAllocator(G1CollectedHeap* g1h) :
-    _g1h(g1h), _alloc_buffer_waste(0), _undo_waste(0) {
+    _g1h(g1h), _survivor_alignment_bytes(calc_survivor_alignment_bytes()),
+    _alloc_buffer_waste(0), _undo_waste(0) {
  }
  static G1ParGCAllocator* create_allocator(G1CollectedHeap* g1h);
@@ -199,24 +214,40 @@ public:
  size_t alloc_buffer_waste() { return _alloc_buffer_waste; }
  size_t undo_waste() {return _undo_waste; }
-  HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz, AllocationContext_t context) {
+  // Allocate word_sz words in dest, either directly into the regions or by
-    HeapWord* obj = NULL;
+  // allocating a new PLAB. Returns the address of the allocated memory, NULL if
-    if (purpose == GCAllocForSurvived) {
+  // not successful.
-      obj = alloc_buffer(purpose, context)->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
+  HeapWord* allocate_direct_or_new_plab(InCSetState dest,
+                                        size_t word_sz,
+                                        AllocationContext_t context);
+  // Allocate word_sz words in the PLAB of dest.  Returns the address of the
+  // allocated memory, NULL if not successful.
+  HeapWord* plab_allocate(InCSetState dest,
+                          size_t word_sz,
+                          AllocationContext_t context) {
+    G1ParGCAllocBuffer* buffer = alloc_buffer(dest, context);
+    if (_survivor_alignment_bytes == 0) {
+      return buffer->allocate(word_sz);
    } else {
-      obj = alloc_buffer(purpose, context)->allocate(word_sz);
+      return buffer->allocate_aligned(word_sz, _survivor_alignment_bytes);
    }
+  }
+  HeapWord* allocate(InCSetState dest, size_t word_sz,
+                     AllocationContext_t context) {
+    HeapWord* const obj = plab_allocate(dest, word_sz, context);
    if (obj != NULL) {
      return obj;
    }
-    return allocate_slow(purpose, word_sz, context);
+    return allocate_direct_or_new_plab(dest, word_sz, context);
  }
-  void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz, AllocationContext_t context) {
+  void undo_allocation(InCSetState dest, HeapWord* obj, size_t word_sz, AllocationContext_t context) {
-    if (alloc_buffer(purpose, context)->contains(obj)) {
+    if (alloc_buffer(dest, context)->contains(obj)) {
-      assert(alloc_buffer(purpose, context)->contains(obj + word_sz - 1),
+      assert(alloc_buffer(dest, context)->contains(obj + word_sz - 1),
             "should contain whole object");
-      alloc_buffer(purpose, context)->undo_allocation(obj, word_sz);
+      alloc_buffer(dest, context)->undo_allocation(obj, word_sz);
    } else {
      CollectedHeap::fill_with_object(obj, word_sz);
      add_to_undo_waste(word_sz);
@@ -227,13 +258,17 @@ public:
 class G1DefaultParGCAllocator : public G1ParGCAllocator {
  G1ParGCAllocBuffer  _surviving_alloc_buffer;
  G1ParGCAllocBuffer  _tenured_alloc_buffer;
-  G1ParGCAllocBuffer* _alloc_buffers[GCAllocPurposeCount];
+  G1ParGCAllocBuffer* _alloc_buffers[InCSetState::Num];
 public:
  G1DefaultParGCAllocator(G1CollectedHeap* g1h);
-  virtual G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose, AllocationContext_t context) {
+  virtual G1ParGCAllocBuffer* alloc_buffer(InCSetState dest, AllocationContext_t context) {
-    return _alloc_buffers[purpose];
+    assert(dest.is_valid(),
+           err_msg("Allocation buffer index out-of-bounds: " CSETSTATE_FORMAT, dest.value()));
+    assert(_alloc_buffers[dest.value()] != NULL,
+           err_msg("Allocation buffer is NULL: " CSETSTATE_FORMAT, dest.value()));
+    return _alloc_buffers[dest.value()];
  }
  virtual void retire_alloc_buffers() ;

--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@@ -32,6 +32,7 @@
 #include "gc_implementation/g1/g1AllocRegion.hpp"
 #include "gc_implementation/g1/g1BiasedArray.hpp"
 #include "gc_implementation/g1/g1HRPrinter.hpp"
+#include "gc_implementation/g1/g1InCSetState.hpp"
 #include "gc_implementation/g1/g1MonitoringSupport.hpp"
 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/g1/g1YCTypes.hpp"
@@ -346,6 +347,7 @@ private:
  volatile unsigned int _old_marking_cycles_completed;
  bool _concurrent_cycle_started;
+  bool _heap_summary_sent;
  // This is a non-product method that is helpful for testing. It is
  // called at the end of a GC and artificially expands the heap by
@@ -545,15 +547,9 @@ protected:
  // allocation region, either by picking one or expanding the
  // heap, and then allocate a block of the given size. The block
  // may not be a humongous - it must fit into a single heap region.
-  HeapWord* par_allocate_during_gc(GCAllocPurpose purpose,
+  inline HeapWord* par_allocate_during_gc(InCSetState dest,
-                                   size_t word_size,
+                                          size_t word_size,
-                                   AllocationContext_t context);
+                                          AllocationContext_t context);
-  HeapWord* allocate_during_gc_slow(GCAllocPurpose purpose,
-                                    HeapRegion*    alloc_region,
-                                    bool           par,
-                                    size_t         word_size);
  // Ensure that no further allocations can happen in "r", bearing in mind
  // that parallel threads might be attempting allocations.
  void par_allocate_remaining_space(HeapRegion* r);
@@ -575,9 +571,9 @@ protected:
  // For GC alloc regions.
  HeapRegion* new_gc_alloc_region(size_t word_size, uint count,
-                                  GCAllocPurpose ap);
+                                  InCSetState dest);
  void retire_gc_alloc_region(HeapRegion* alloc_region,
-                              size_t allocated_bytes, GCAllocPurpose ap);
+                              size_t allocated_bytes, InCSetState dest);
  // - if explicit_gc is true, the GC is for a System.gc() or a heap
  //   inspection request and should collect the entire heap
@@ -638,26 +634,11 @@ public:
  // (Rounds up to a HeapRegion boundary.)
  bool expand(size_t expand_bytes);
-  // Returns the PLAB statistics given a purpose.
+  // Returns the PLAB statistics for a given destination.
-  PLABStats* stats_for_purpose(GCAllocPurpose purpose) {
+  inline PLABStats* alloc_buffer_stats(InCSetState dest);
-    PLABStats* stats = NULL;
-    switch (purpose) {
-    case GCAllocForSurvived:
-      stats = &_survivor_plab_stats;
-      break;
-    case GCAllocForTenured:
-      stats = &_old_plab_stats;
-      break;
-    default:
-      assert(false, "unrecognized GCAllocPurpose");
-    }
-    return stats;
+  // Determines PLAB size for a given destination.
-  }
+  inline size_t desired_plab_sz(InCSetState dest);
-  // Determines PLAB size for a particular allocation purpose.
-  size_t desired_plab_sz(GCAllocPurpose purpose);
  inline AllocationContextStats& allocation_context_stats();
@@ -681,8 +662,11 @@ public:
  void register_humongous_regions_with_in_cset_fast_test();
  // We register a region with the fast "in collection set" test. We
  // simply set to true the array slot corresponding to this region.
-  void register_region_with_in_cset_fast_test(HeapRegion* r) {
+  void register_young_region_with_in_cset_fast_test(HeapRegion* r) {
-    _in_cset_fast_test.set_in_cset(r->hrm_index());
+    _in_cset_fast_test.set_in_young(r->hrm_index());
+  }
+  void register_old_region_with_in_cset_fast_test(HeapRegion* r) {
+    _in_cset_fast_test.set_in_old(r->hrm_index());
  }
  // This is a fast test on whether a reference points into the
@@ -812,22 +796,6 @@ protected:
  // statistics or updating free lists.
  void abandon_collection_set(HeapRegion* cs_head);
-  // Applies "scan_non_heap_roots" to roots outside the heap,
-  // "scan_rs" to roots inside the heap (having done "set_region" to
-  // indicate the region in which the root resides),
-  // and does "scan_metadata" If "scan_rs" is
-  // NULL, then this step is skipped.  The "worker_i"
-  // param is for use with parallel roots processing, and should be
-  // the "i" of the calling parallel worker thread's work(i) function.
-  // In the sequential case this param will be ignored.
-  void g1_process_roots(OopClosure* scan_non_heap_roots,
-                        OopClosure* scan_non_heap_weak_roots,
-                        OopsInHeapRegionClosure* scan_rs,
-                        CLDClosure* scan_strong_clds,
-                        CLDClosure* scan_weak_clds,
-                        CodeBlobClosure* scan_strong_code,
-                        uint worker_i);
  // The concurrent marker (and the thread it runs in.)
  ConcurrentMark* _cm;
  ConcurrentMarkThread* _cmThread;
@@ -1014,21 +982,10 @@ protected:
  // of G1CollectedHeap::_gc_time_stamp.
  unsigned int* _worker_cset_start_region_time_stamp;
-  enum G1H_process_roots_tasks {
-    G1H_PS_filter_satb_buffers,
-    G1H_PS_refProcessor_oops_do,
-    // Leave this one last.
-    G1H_PS_NumElements
-  };
-  SubTasksDone* _process_strong_tasks;
  volatile bool _free_regions_coming;
 public:
-  SubTasksDone* process_strong_tasks() { return _process_strong_tasks; }
  void set_refine_cte_cl_concurrency(bool concurrent);
  RefToScanQueue *task_queue(int i) const;
@@ -1061,21 +1018,11 @@ public:
  // Initialize weak reference processing.
  virtual void ref_processing_init();
-  void set_par_threads(uint t) {
+  // Explicitly import set_par_threads into this scope
-    SharedHeap::set_par_threads(t);
+  using SharedHeap::set_par_threads;
-    // Done in SharedHeap but oddly there are
-    // two _process_strong_tasks's in a G1CollectedHeap
-    // so do it here too.
-    _process_strong_tasks->set_n_threads(t);
-  }
  // Set _n_par_threads according to a policy TBD.
  void set_par_threads();
-  void set_n_termination(int t) {
-    _process_strong_tasks->set_n_threads(t);
-  }
  virtual CollectedHeap::Name kind() const {
    return CollectedHeap::G1CollectedHeap;
  }
@@ -1182,6 +1129,9 @@ public:
  // appropriate error messages and crash.
  void check_bitmaps(const char* caller) PRODUCT_RETURN;
+  // Do sanity check on the contents of the in-cset fast test table.
+  bool check_cset_fast_test() PRODUCT_RETURN_( return true; );
  // verify_region_sets() performs verification over the region
  // lists. It will be compiled in the product code to be used when
  // necessary (i.e., during heap verification).
@@ -1277,53 +1227,15 @@ public:
  inline bool is_in_cset_or_humongous(const oop obj);
-  enum in_cset_state_t {
-   InNeither,           // neither in collection set nor humongous
-   InCSet,              // region is in collection set only
-   IsHumongous          // region is a humongous start region
-  };
 private:
-  // Instances of this class are used for quick tests on whether a reference points
-  // into the collection set or is a humongous object (points into a humongous
-  // object).
-  // Each of the array's elements denotes whether the corresponding region is in
-  // the collection set or a humongous region.
-  // We use this to quickly reclaim humongous objects: by making a humongous region
-  // succeed this test, we sort-of add it to the collection set. During the reference
-  // iteration closures, when we see a humongous region, we simply mark it as
-  // referenced, i.e. live.
-  class G1FastCSetBiasedMappedArray : public G1BiasedMappedArray<char> {
-   protected:
-    char default_value() const { return G1CollectedHeap::InNeither; }
-   public:
-    void set_humongous(uintptr_t index) {
-      assert(get_by_index(index) != InCSet, "Should not overwrite InCSet values");
-      set_by_index(index, G1CollectedHeap::IsHumongous);
-    }
-    void clear_humongous(uintptr_t index) {
-      set_by_index(index, G1CollectedHeap::InNeither);
-    }
-    void set_in_cset(uintptr_t index) {
-      assert(get_by_index(index) != G1CollectedHeap::IsHumongous, "Should not overwrite IsHumongous value");
-      set_by_index(index, G1CollectedHeap::InCSet);
-    }
-    bool is_in_cset_or_humongous(HeapWord* addr) const { return get_by_address(addr) != G1CollectedHeap::InNeither; }
-    bool is_in_cset(HeapWord* addr) const { return get_by_address(addr) == G1CollectedHeap::InCSet; }
-    G1CollectedHeap::in_cset_state_t at(HeapWord* addr) const { return (G1CollectedHeap::in_cset_state_t)get_by_address(addr); }
-    void clear() { G1BiasedMappedArray<char>::clear(); }
-  };
  // This array is used for a quick test on whether a reference points into
  // the collection set or not. Each of the array's elements denotes whether the
  // corresponding region is in the collection set or not.
-  G1FastCSetBiasedMappedArray _in_cset_fast_test;
+  G1InCSetStateFastTestBiasedMappedArray _in_cset_fast_test;
 public:
-  inline in_cset_state_t in_cset_state(const oop obj);
+  inline InCSetState in_cset_state(const oop obj);
  // Return "TRUE" iff the given object address is in the reserved
  // region of g1.

--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
@@ -35,6 +35,41 @@
 #include "runtime/orderAccess.inline.hpp"
 #include "utilities/taskqueue.hpp"
+PLABStats* G1CollectedHeap::alloc_buffer_stats(InCSetState dest) {
+  switch (dest.value()) {
+    case InCSetState::Young:
+      return &_survivor_plab_stats;
+    case InCSetState::Old:
+      return &_old_plab_stats;
+    default:
+      ShouldNotReachHere();
+      return NULL; // Keep some compilers happy
+  }
+}
+size_t G1CollectedHeap::desired_plab_sz(InCSetState dest) {
+  size_t gclab_word_size = alloc_buffer_stats(dest)->desired_plab_sz();
+  // Prevent humongous PLAB sizes for two reasons:
+  // * PLABs are allocated using a similar paths as oops, but should
+  //   never be in a humongous region
+  // * Allowing humongous PLABs needlessly churns the region free lists
+  return MIN2(_humongous_object_threshold_in_words, gclab_word_size);
+}
+HeapWord* G1CollectedHeap::par_allocate_during_gc(InCSetState dest,
+                                                  size_t word_size,
+                                                  AllocationContext_t context) {
+  switch (dest.value()) {
+    case InCSetState::Young:
+      return survivor_attempt_allocation(word_size, context);
+    case InCSetState::Old:
+      return old_attempt_allocation(word_size, context);
+    default:
+      ShouldNotReachHere();
+      return NULL; // Keep some compilers happy
+  }
+}
 // Inline functions for G1CollectedHeap
 inline AllocationContextStats& G1CollectedHeap::allocation_context_stats() {
@@ -203,7 +238,7 @@ bool G1CollectedHeap::is_in_cset_or_humongous(const oop obj) {
  return _in_cset_fast_test.is_in_cset_or_humongous((HeapWord*)obj);
 }
-G1CollectedHeap::in_cset_state_t G1CollectedHeap::in_cset_state(const oop obj) {
+InCSetState G1CollectedHeap::in_cset_state(const oop obj) {
  return _in_cset_fast_test.at((HeapWord*)obj);
 }

--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
@@ -1084,7 +1084,7 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua
  if (update_stats) {
    double cost_per_card_ms = 0.0;
    if (_pending_cards > 0) {
-      cost_per_card_ms = phase_times()->average_last_update_rs_time() / (double) _pending_cards;
+      cost_per_card_ms = phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) / (double) _pending_cards;
      _cost_per_card_ms_seq->add(cost_per_card_ms);
    }
@@ -1092,7 +1092,7 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua
    double cost_per_entry_ms = 0.0;
    if (cards_scanned > 10) {
-      cost_per_entry_ms = phase_times()->average_last_scan_rs_time() / (double) cards_scanned;
+      cost_per_entry_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) / (double) cards_scanned;
      if (_last_gc_was_young) {
        _cost_per_entry_ms_seq->add(cost_per_entry_ms);
      } else {
@@ -1134,7 +1134,7 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua
    double cost_per_byte_ms = 0.0;
    if (copied_bytes > 0) {
-      cost_per_byte_ms = phase_times()->average_last_obj_copy_time() / (double) copied_bytes;
+      cost_per_byte_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ObjCopy) / (double) copied_bytes;
      if (_in_marking_window) {
        _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms);
      } else {
@@ -1143,8 +1143,8 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua
    }
    double all_other_time_ms = pause_time_ms -
-      (phase_times()->average_last_update_rs_time() + phase_times()->average_last_scan_rs_time()
+      (phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) + phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) +
-      + phase_times()->average_last_obj_copy_time() + phase_times()->average_last_termination_time());
+          phase_times()->average_time_ms(G1GCPhaseTimes::ObjCopy) + phase_times()->average_time_ms(G1GCPhaseTimes::Termination));
    double young_other_time_ms = 0.0;
    if (young_cset_region_length() > 0) {
@@ -1185,8 +1185,8 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua
  // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
  double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
-  adjust_concurrent_refinement(phase_times()->average_last_update_rs_time(),
+  adjust_concurrent_refinement(phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS),
-                               phase_times()->sum_last_update_rs_processed_buffers(), update_rs_time_goal_ms);
+                               phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS), update_rs_time_goal_ms);
  _collectionSetChooser->verify();
 }
@@ -1437,18 +1437,6 @@ bool G1CollectorPolicy::can_expand_young_list() {
  return young_list_length < young_list_max_length;
 }
-uint G1CollectorPolicy::max_regions(int purpose) {
-  switch (purpose) {
-    case GCAllocForSurvived:
-      return _max_survivor_regions;
-    case GCAllocForTenured:
-      return REGIONS_UNLIMITED;
-    default:
-      ShouldNotReachHere();
-      return REGIONS_UNLIMITED;
-  };
-}
 void G1CollectorPolicy::update_max_gc_locker_expansion() {
  uint expansion_region_num = 0;
  if (GCLockerEdenExpansionPercent > 0) {
@@ -1683,7 +1671,7 @@ void G1CollectorPolicy::add_old_region_to_cset(HeapRegion* hr) {
  hr->set_next_in_collection_set(_collection_set);
  _collection_set = hr;
  _collection_set_bytes_used_before += hr->used();
-  _g1->register_region_with_in_cset_fast_test(hr);
+  _g1->register_old_region_with_in_cset_fast_test(hr);
  size_t rs_length = hr->rem_set()->occupied();
  _recorded_rs_lengths += rs_length;
  _old_cset_region_length += 1;
@@ -1816,7 +1804,7 @@ void G1CollectorPolicy::add_region_to_incremental_cset_common(HeapRegion* hr) {
  hr->set_in_collection_set(true);
  assert( hr->next_in_collection_set() == NULL, "invariant");
-  _g1->register_region_with_in_cset_fast_test(hr);
+  _g1->register_young_region_with_in_cset_fast_test(hr);
 }
 // Add the region at the RHS of the incremental cset
@@ -2189,19 +2177,19 @@ void TraceGen0TimeData::record_end_collection(double pause_time_ms, G1GCPhaseTim
    _other.add(pause_time_ms - phase_times->accounted_time_ms());
    _root_region_scan_wait.add(phase_times->root_region_scan_wait_time_ms());
    _parallel.add(phase_times->cur_collection_par_time_ms());
-    _ext_root_scan.add(phase_times->average_last_ext_root_scan_time());
+    _ext_root_scan.add(phase_times->average_time_ms(G1GCPhaseTimes::ExtRootScan));
-    _satb_filtering.add(phase_times->average_last_satb_filtering_times_ms());
+    _satb_filtering.add(phase_times->average_time_ms(G1GCPhaseTimes::SATBFiltering));
-    _update_rs.add(phase_times->average_last_update_rs_time());
+    _update_rs.add(phase_times->average_time_ms(G1GCPhaseTimes::UpdateRS));
-    _scan_rs.add(phase_times->average_last_scan_rs_time());
+    _scan_rs.add(phase_times->average_time_ms(G1GCPhaseTimes::ScanRS));
-    _obj_copy.add(phase_times->average_last_obj_copy_time());
+    _obj_copy.add(phase_times->average_time_ms(G1GCPhaseTimes::ObjCopy));
-    _termination.add(phase_times->average_last_termination_time());
+    _termination.add(phase_times->average_time_ms(G1GCPhaseTimes::Termination));
-    double parallel_known_time = phase_times->average_last_ext_root_scan_time() +
+    double parallel_known_time = phase_times->average_time_ms(G1GCPhaseTimes::ExtRootScan) +
-      phase_times->average_last_satb_filtering_times_ms() +
+      phase_times->average_time_ms(G1GCPhaseTimes::SATBFiltering) +
-      phase_times->average_last_update_rs_time() +
+      phase_times->average_time_ms(G1GCPhaseTimes::UpdateRS) +
-      phase_times->average_last_scan_rs_time() +
+      phase_times->average_time_ms(G1GCPhaseTimes::ScanRS) +
-      phase_times->average_last_obj_copy_time() +
+      phase_times->average_time_ms(G1GCPhaseTimes::ObjCopy) +
-      + phase_times->average_last_termination_time();
+      phase_times->average_time_ms(G1GCPhaseTimes::Termination);
    double parallel_other_time = phase_times->cur_collection_par_time_ms() - parallel_known_time;
    _parallel_other.add(parallel_other_time);

--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
@@ -877,28 +877,20 @@ private:
 public:
  uint tenuring_threshold() const { return _tenuring_threshold; }
-  inline GCAllocPurpose
-    evacuation_destination(HeapRegion* src_region, uint age, size_t word_sz) {
-      if (age < _tenuring_threshold && src_region->is_young()) {
-        return GCAllocForSurvived;
-      } else {
-        return GCAllocForTenured;
-      }
-  }
-  inline bool track_object_age(GCAllocPurpose purpose) {
-    return purpose == GCAllocForSurvived;
-  }
  static const uint REGIONS_UNLIMITED = (uint) -1;
-  uint max_regions(int purpose);
+  uint max_regions(InCSetState dest) {
+    switch (dest.value()) {
-  // The limit on regions for a particular purpose is reached.
+      case InCSetState::Young:
-  void note_alloc_region_limit_reached(int purpose) {
+        return _max_survivor_regions;
-    if (purpose == GCAllocForSurvived) {
+      case InCSetState::Old:
-      _tenuring_threshold = 0;
+        return REGIONS_UNLIMITED;
+      default:
+        assert(false, err_msg("Unknown dest state: " CSETSTATE_FORMAT, dest.value()));
+        break;
    }
+    // keep some compilers happy
+    return 0;
  }
  void note_start_adding_survivor_regions() {

--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
@@ -26,106 +26,60 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
 #include "memory/allocation.hpp"
-#include "gc_interface/gcCause.hpp"
-template <class T>
+class LineBuffer;
-class WorkerDataArray  : public CHeapObj<mtGC> {
-  T*          _data;
-  uint        _length;
-  const char* _print_format;
-  bool        _print_sum;
-  NOT_PRODUCT(static const T _uninitialized;)
+template <class T> class WorkerDataArray;
-  // We are caching the sum and average to only have to calculate them once.
-  // This is not done in an MT-safe way. It is intended to allow single
-  // threaded code to call sum() and average() multiple times in any order
-  // without having to worry about the cost.
-  bool   _has_new_data;
-  T      _sum;
-  double _average;
- public:
-  WorkerDataArray(uint length, const char* print_format, bool print_sum = true) :
-  _length(length), _print_format(print_format), _print_sum(print_sum), _has_new_data(true) {
-    assert(length > 0, "Must have some workers to store data for");
-    _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
-  }
-  ~WorkerDataArray() {
-    FREE_C_HEAP_ARRAY(T, _data, mtGC);
-  }
-  void set(uint worker_i, T value) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] == (T)-1, err_msg("Overwriting data for worker %d", worker_i));
-    _data[worker_i] = value;
-    _has_new_data = true;
-  }
-  T get(uint worker_i) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i));
-    return _data[worker_i];
-  }
-  void add(uint worker_i, T value) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i));
-    _data[worker_i] += value;
-    _has_new_data = true;
-  }
-  double average(){
-    if (_has_new_data) {
-      calculate_totals();
-    }
-    return _average;
-  }
-  T sum() {
-    if (_has_new_data) {
-      calculate_totals();
-    }
-    return _sum;
-  }
-  void print(int level, const char* title);
-  void reset() PRODUCT_RETURN;
-  void verify() PRODUCT_RETURN;
- private:
-  void calculate_totals(){
-    _sum = (T)0;
-    for (uint i = 0; i < _length; ++i) {
-      _sum += _data[i];
-    }
-    _average = (double)_sum / (double)_length;
-    _has_new_data = false;
-  }
-};
 class G1GCPhaseTimes : public CHeapObj<mtGC> {
+  friend class G1GCParPhasePrinter;
- private:
  uint _active_gc_threads;
  uint _max_gc_threads;
-  WorkerDataArray<double> _last_gc_worker_start_times_ms;
+ public:
-  WorkerDataArray<double> _last_ext_root_scan_times_ms;
+  enum GCParPhases {
-  WorkerDataArray<double> _last_satb_filtering_times_ms;
+    GCWorkerStart,
-  WorkerDataArray<double> _last_update_rs_times_ms;
+    ExtRootScan,
-  WorkerDataArray<int>    _last_update_rs_processed_buffers;
+    ThreadRoots,
-  WorkerDataArray<double> _last_scan_rs_times_ms;
+    StringTableRoots,
-  WorkerDataArray<double> _last_strong_code_root_scan_times_ms;
+    UniverseRoots,
-  WorkerDataArray<double> _last_obj_copy_times_ms;
+    JNIRoots,
-  WorkerDataArray<double> _last_termination_times_ms;
+    ObjectSynchronizerRoots,
-  WorkerDataArray<size_t> _last_termination_attempts;
+    FlatProfilerRoots,
-  WorkerDataArray<double> _last_gc_worker_end_times_ms;
+    ManagementRoots,
-  WorkerDataArray<double> _last_gc_worker_times_ms;
+    SystemDictionaryRoots,
-  WorkerDataArray<double> _last_gc_worker_other_times_ms;
+    CLDGRoots,
+    JVMTIRoots,
+    CodeCacheRoots,
+    CMRefRoots,
+    WaitForStrongCLD,
+    WeakCLDRoots,
+    SATBFiltering,
+    UpdateRS,
+    ScanRS,
+    CodeRoots,
+    ObjCopy,
+    Termination,
+    Other,
+    GCWorkerTotal,
+    GCWorkerEnd,
+    StringDedupQueueFixup,
+    StringDedupTableFixup,
+    RedirtyCards,
+    GCParPhasesSentinel
+  };
+ private:
+  // Markers for grouping the phases in the GCPhases enum above
+  static const int GCMainParPhasesLast = GCWorkerEnd;
+  static const int StringDedupPhasesFirst = StringDedupQueueFixup;
+  static const int StringDedupPhasesLast = StringDedupTableFixup;
+  WorkerDataArray<double>* _gc_par_phases[GCParPhasesSentinel];
+  WorkerDataArray<size_t>* _update_rs_processed_buffers;
+  WorkerDataArray<size_t>* _termination_attempts;
+  WorkerDataArray<size_t>* _redirtied_cards;
  double _cur_collection_par_time_ms;
  double _cur_collection_code_root_fixup_time_ms;
@@ -135,9 +89,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
  double _cur_evac_fail_restore_remsets;
  double _cur_evac_fail_remove_self_forwards;
-  double                  _cur_string_dedup_fixup_time_ms;
+  double _cur_string_dedup_fixup_time_ms;
-  WorkerDataArray<double> _cur_string_dedup_queue_fixup_worker_times_ms;
-  WorkerDataArray<double> _cur_string_dedup_table_fixup_worker_times_ms;
  double _cur_clear_ct_time_ms;
  double _cur_ref_proc_time_ms;
@@ -149,8 +101,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
  double _recorded_young_cset_choice_time_ms;
  double _recorded_non_young_cset_choice_time_ms;
-  WorkerDataArray<double> _last_redirty_logged_cards_time_ms;
-  WorkerDataArray<size_t> _last_redirty_logged_cards_processed_cards;
  double _recorded_redirty_logged_cards_time_ms;
  double _recorded_young_free_cset_time_ms;
@@ -171,54 +121,34 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
 public:
  G1GCPhaseTimes(uint max_gc_threads);
-  void note_gc_start(uint active_gc_threads);
+  void note_gc_start(uint active_gc_threads, bool mark_in_progress);
  void note_gc_end();
  void print(double pause_time_sec);
-  void record_gc_worker_start_time(uint worker_i, double ms) {
+  // record the time a phase took in seconds
-    _last_gc_worker_start_times_ms.set(worker_i, ms);
+  void record_time_secs(GCParPhases phase, uint worker_i, double secs);
-  }
-  void record_ext_root_scan_time(uint worker_i, double ms) {
-    _last_ext_root_scan_times_ms.set(worker_i, ms);
-  }
-  void record_satb_filtering_time(uint worker_i, double ms) {
-    _last_satb_filtering_times_ms.set(worker_i, ms);
-  }
-  void record_update_rs_time(uint worker_i, double ms) {
-    _last_update_rs_times_ms.set(worker_i, ms);
-  }
-  void record_update_rs_processed_buffers(uint worker_i, int processed_buffers) {
-    _last_update_rs_processed_buffers.set(worker_i, processed_buffers);
-  }
-  void record_scan_rs_time(uint worker_i, double ms) {
+  // add a number of seconds to a phase
-    _last_scan_rs_times_ms.set(worker_i, ms);
+  void add_time_secs(GCParPhases phase, uint worker_i, double secs);
-  }
-  void record_strong_code_root_scan_time(uint worker_i, double ms) {
+  void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count);
-    _last_strong_code_root_scan_times_ms.set(worker_i, ms);
-  }
-  void record_obj_copy_time(uint worker_i, double ms) {
+  // return the average time for a phase in milliseconds
-    _last_obj_copy_times_ms.set(worker_i, ms);
+  double average_time_ms(GCParPhases phase);
-  }
-  void add_obj_copy_time(uint worker_i, double ms) {
+  size_t sum_thread_work_items(GCParPhases phase);
-    _last_obj_copy_times_ms.add(worker_i, ms);
-  }
-  void record_termination(uint worker_i, double ms, size_t attempts) {
+ private:
-    _last_termination_times_ms.set(worker_i, ms);
+  double get_time_ms(GCParPhases phase, uint worker_i);
-    _last_termination_attempts.set(worker_i, attempts);
+  double sum_time_ms(GCParPhases phase);
-  }
+  double min_time_ms(GCParPhases phase);
+  double max_time_ms(GCParPhases phase);
+  size_t get_thread_work_item(GCParPhases phase, uint worker_i);
+  double average_thread_work_items(GCParPhases phase);
+  size_t min_thread_work_items(GCParPhases phase);
+  size_t max_thread_work_items(GCParPhases phase);
-  void record_gc_worker_end_time(uint worker_i, double ms) {
+ public:
-    _last_gc_worker_end_times_ms.set(worker_i, ms);
-  }
  void record_clear_ct_time(double ms) {
    _cur_clear_ct_time_ms = ms;
@@ -248,21 +178,10 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
    _cur_evac_fail_remove_self_forwards = ms;
  }
-  void note_string_dedup_fixup_start();
-  void note_string_dedup_fixup_end();
  void record_string_dedup_fixup_time(double ms) {
    _cur_string_dedup_fixup_time_ms = ms;
  }
-  void record_string_dedup_queue_fixup_worker_time(uint worker_id, double ms) {
-    _cur_string_dedup_queue_fixup_worker_times_ms.set(worker_id, ms);
-  }
-  void record_string_dedup_table_fixup_worker_time(uint worker_id, double ms) {
-    _cur_string_dedup_table_fixup_worker_times_ms.set(worker_id, ms);
-  }
  void record_ref_proc_time(double ms) {
    _cur_ref_proc_time_ms = ms;
  }
@@ -301,14 +220,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
    _recorded_non_young_cset_choice_time_ms = time_ms;
  }
-  void record_redirty_logged_cards_time_ms(uint worker_i, double time_ms) {
-    _last_redirty_logged_cards_time_ms.set(worker_i, time_ms);
-  }
-  void record_redirty_logged_cards_processed_cards(uint worker_i, size_t processed_buffers) {
-    _last_redirty_logged_cards_processed_cards.set(worker_i, processed_buffers);
-  }
  void record_redirty_logged_cards_time_ms(double time_ms) {
    _recorded_redirty_logged_cards_time_ms = time_ms;
  }
@@ -362,38 +273,16 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
  double fast_reclaim_humongous_time_ms() {
    return _cur_fast_reclaim_humongous_time_ms;
  }
+};
-  double average_last_update_rs_time() {
+class G1GCParPhaseTimesTracker : public StackObj {
-    return _last_update_rs_times_ms.average();
+  double _start_time;
-  }
+  G1GCPhaseTimes::GCParPhases _phase;
+  G1GCPhaseTimes* _phase_times;
-  int sum_last_update_rs_processed_buffers() {
+  uint _worker_id;
-    return _last_update_rs_processed_buffers.sum();
+public:
-  }
+  G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id);
+  ~G1GCParPhaseTimesTracker();
-  double average_last_scan_rs_time(){
-    return _last_scan_rs_times_ms.average();
-  }
-  double average_last_strong_code_root_scan_time(){
-    return _last_strong_code_root_scan_times_ms.average();
-  }
-  double average_last_obj_copy_time() {
-    return _last_obj_copy_times_ms.average();
-  }
-  double average_last_termination_time() {
-    return _last_termination_times_ms.average();
-  }
-  double average_last_ext_root_scan_time() {
-    return _last_ext_root_scan_times_ms.average();
-  }
-  double average_last_satb_filtering_times_ms() {
-    return _last_satb_filtering_times_ms.average();
-  }
 };
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
--- a/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp
+++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -36,16 +36,13 @@ void G1HotCardCache::initialize(G1RegionToSpaceMapper* card_counts_storage) {
  if (default_use_cache()) {
    _use_cache = true;
-    _hot_cache_size = (1 << G1ConcRSLogCacheSize);
+    _hot_cache_size = (size_t)1 << G1ConcRSLogCacheSize;
    _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC);
-    _n_hot = 0;
+    reset_hot_cache_internal();
-    _hot_cache_idx = 0;
    // For refining the cards in the hot cache in parallel
-    uint n_workers = (ParallelGCThreads > 0 ?
+    _hot_cache_par_chunk_size = (int)(ParallelGCThreads > 0 ? ClaimChunkSize : _hot_cache_size);
-                        _g1h->workers()->total_workers() : 1);
-    _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / (int)n_workers);
    _hot_cache_par_claimed_idx = 0;
    _card_counts.initialize(card_counts_storage);
@@ -66,26 +63,21 @@ jbyte* G1HotCardCache::insert(jbyte* card_ptr) {
    // return it for immediate refining.
    return card_ptr;
  }
  // Otherwise, the card is hot.
-  jbyte* res = NULL;
+  size_t index = Atomic::add_ptr((intptr_t)1, (volatile intptr_t*)&_hot_cache_idx) - 1;
-  MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
+  size_t masked_index = index & (_hot_cache_size - 1);
-  if (_n_hot == _hot_cache_size) {
+  jbyte* current_ptr = _hot_cache[masked_index];
-    res = _hot_cache[_hot_cache_idx];
-    _n_hot--;
+  // Try to store the new card pointer into the cache. Compare-and-swap to guard
-  }
+  // against the unlikely event of a race resulting in another card pointer to
+  // have already been written to the cache. In this case we will return
-  // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
+  // card_ptr in favor of the other option, which would be starting over. This
-  _hot_cache[_hot_cache_idx] = card_ptr;
+  // should be OK since card_ptr will likely be the older card already when/if
-  _hot_cache_idx++;
+  // this ever happens.
+  jbyte* previous_ptr = (jbyte*)Atomic::cmpxchg_ptr(card_ptr,
-  if (_hot_cache_idx == _hot_cache_size) {
+                                                    &_hot_cache[masked_index],
-    // Wrap around
+                                                    current_ptr);
-    _hot_cache_idx = 0;
+  return (previous_ptr == current_ptr) ? previous_ptr : card_ptr;
-  }
-  _n_hot++;
-  return res;
 }
 void G1HotCardCache::drain(uint worker_i,
@@ -98,38 +90,37 @@ void G1HotCardCache::drain(uint worker_i,
  assert(_hot_cache != NULL, "Logic");
  assert(!use_cache(), "cache should be disabled");
-  int start_idx;
+  while (_hot_cache_par_claimed_idx < _hot_cache_size) {
+    size_t end_idx = Atomic::add_ptr((intptr_t)_hot_cache_par_chunk_size,
-  while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
+                                     (volatile intptr_t*)&_hot_cache_par_claimed_idx);
-    int end_idx = start_idx + _hot_cache_par_chunk_size;
+    size_t start_idx = end_idx - _hot_cache_par_chunk_size;
+    // The current worker has successfully claimed the chunk [start_idx..end_idx)
-    if (start_idx ==
+    end_idx = MIN2(end_idx, _hot_cache_size);
-        Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
+    for (size_t i = start_idx; i < end_idx; i++) {
-      // The current worker has successfully claimed the chunk [start_idx..end_idx)
+      jbyte* card_ptr = _hot_cache[i];
-      end_idx = MIN2(end_idx, _n_hot);
+      if (card_ptr != NULL) {
-      for (int i = start_idx; i < end_idx; i++) {
+        if (g1rs->refine_card(card_ptr, worker_i, true)) {
-        jbyte* card_ptr = _hot_cache[i];
+          // The part of the heap spanned by the card contains references
-        if (card_ptr != NULL) {
+          // that point into the current collection set.
-          if (g1rs->refine_card(card_ptr, worker_i, true)) {
+          // We need to record the card pointer in the DirtyCardQueueSet
-            // The part of the heap spanned by the card contains references
+          // that we use for such cards.
-            // that point into the current collection set.
+          //
-            // We need to record the card pointer in the DirtyCardQueueSet
+          // The only time we care about recording cards that contain
-            // that we use for such cards.
+          // references that point into the collection set is during
-            //
+          // RSet updating while within an evacuation pause.
-            // The only time we care about recording cards that contain
+          // In this case worker_i should be the id of a GC worker thread
-            // references that point into the collection set is during
+          assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
-            // RSet updating while within an evacuation pause.
+          assert(worker_i < ParallelGCThreads,
-            // In this case worker_i should be the id of a GC worker thread
+                 err_msg("incorrect worker id: %u", worker_i));
-            assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
-            assert(worker_i < (ParallelGCThreads == 0 ? 1 : ParallelGCThreads),
+          into_cset_dcq->enqueue(card_ptr);
-                   err_msg("incorrect worker id: "UINT32_FORMAT, worker_i));
-            into_cset_dcq->enqueue(card_ptr);
-          }
        }
+      } else {
+        break;
      }
    }
  }
  // The existing entries in the hot card cache, which were just refined
  // above, are discarded prior to re-enabling the cache near the end of the GC.
 }

--- a/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp
+++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -54,21 +54,33 @@ class HeapRegion;
 // code, increasing throughput.
 class G1HotCardCache: public CHeapObj<mtGC> {
-  G1CollectedHeap*   _g1h;
+  G1CollectedHeap*  _g1h;
+  bool              _use_cache;
+  G1CardCounts      _card_counts;
  // The card cache table
-  jbyte**      _hot_cache;
+  jbyte**           _hot_cache;
+  size_t            _hot_cache_size;
+  int               _hot_cache_par_chunk_size;
-  int          _hot_cache_size;
+  // Avoids false sharing when concurrently updating _hot_cache_idx or
-  int          _n_hot;
+  // _hot_cache_par_claimed_idx. These are never updated at the same time
-  int          _hot_cache_idx;
+  // thus it's not necessary to separate them as well
+  char _pad_before[DEFAULT_CACHE_LINE_SIZE];
-  int          _hot_cache_par_chunk_size;
+  volatile size_t _hot_cache_idx;
-  volatile int _hot_cache_par_claimed_idx;
-  bool         _use_cache;
+  volatile size_t _hot_cache_par_claimed_idx;
-  G1CardCounts _card_counts;
+  char _pad_after[DEFAULT_CACHE_LINE_SIZE];
+  // The number of cached cards a thread claims when flushing the cache
+  static const int ClaimChunkSize = 32;
  bool default_use_cache() const {
    return (G1ConcRSLogCacheSize > 0);
@@ -110,16 +122,25 @@ class G1HotCardCache: public CHeapObj<mtGC> {
  void reset_hot_cache() {
    assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
    assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread");
-    _hot_cache_idx = 0; _n_hot = 0;
+    if (default_use_cache()) {
+        reset_hot_cache_internal();
+    }
  }
-  bool hot_cache_is_empty() { return _n_hot == 0; }
  // Zeros the values in the card counts table for entire committed heap
  void reset_card_counts();
  // Zeros the values in the card counts table for the given region
  void reset_card_counts(HeapRegion* hr);
+ private:
+  void reset_hot_cache_internal() {
+    assert(_hot_cache != NULL, "Logic");
+    _hot_cache_idx = 0;
+    for (size_t i = 0; i < _hot_cache_size; i++) {
+      _hot_cache[i] = NULL;
+    }
+  }
 };
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1HOTCARDCACHE_HPP
--- a/src/share/vm/gc_implementation/g1/g1InCSetState.hpp
+++ b/src/share/vm/gc_implementation/g1/g1InCSetState.hpp
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1INCSETSTATE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1INCSETSTATE_HPP
+#include "gc_implementation/g1/g1BiasedArray.hpp"
+#include "memory/allocation.hpp"
+// Per-region state during garbage collection.
+struct InCSetState {
+ public:
+  // We use different types to represent the state value. Particularly SPARC puts
+  // values in structs from "left to right", i.e. MSB to LSB. This results in many
+  // unnecessary shift operations when loading and storing values of this type.
+  // This degrades performance significantly (>10%) on that platform.
+  // Other tested ABIs do not seem to have this problem, and actually tend to
+  // favor smaller types, so we use the smallest usable type there.
+#ifdef SPARC
+  #define CSETSTATE_FORMAT INTPTR_FORMAT
+  typedef intptr_t in_cset_state_t;
+#else
+  #define CSETSTATE_FORMAT "%d"
+  typedef int8_t in_cset_state_t;
+#endif
+ private:
+  in_cset_state_t _value;
+ public:
+  enum {
+    // Selection of the values were driven to micro-optimize the encoding and
+    // frequency of the checks.
+    // The most common check is whether the region is in the collection set or not.
+    // This encoding allows us to use an != 0 check which in some architectures
+    // (x86*) can be encoded slightly more efficently than a normal comparison
+    // against zero.
+    // The same situation occurs when checking whether the region is humongous
+    // or not, which is encoded by values < 0.
+    // The other values are simply encoded in increasing generation order, which
+    // makes getting the next generation fast by a simple increment.
+    Humongous    = -1,    // The region is humongous - note that actually any value < 0 would be possible here.
+    NotInCSet    =  0,    // The region is not in the collection set.
+    Young        =  1,    // The region is in the collection set and a young region.
+    Old          =  2,    // The region is in the collection set and an old region.
+    Num
+  };
+  InCSetState(in_cset_state_t value = NotInCSet) : _value(value) {
+    assert(is_valid(), err_msg("Invalid state %d", _value));
+  }
+  in_cset_state_t value() const        { return _value; }
+  void set_old()                       { _value = Old; }
+  bool is_in_cset_or_humongous() const { return _value != NotInCSet; }
+  bool is_in_cset() const              { return _value > NotInCSet; }
+  bool is_humongous() const            { return _value < NotInCSet; }
+  bool is_young() const                { return _value == Young; }
+  bool is_old() const                  { return _value == Old; }
+#ifdef ASSERT
+  bool is_default() const              { return !is_in_cset_or_humongous(); }
+  bool is_valid() const                { return (_value >= Humongous) && (_value < Num); }
+  bool is_valid_gen() const            { return (_value >= Young && _value <= Old); }
+#endif
+};
+// Instances of this class are used for quick tests on whether a reference points
+// into the collection set and into which generation or is a humongous object
+//
+// Each of the array's elements indicates whether the corresponding region is in
+// the collection set and if so in which generation, or a humongous region.
+//
+// We use this to speed up reference processing during young collection and
+// quickly reclaim humongous objects. For the latter, by making a humongous region
+// succeed this test, we sort-of add it to the collection set. During the reference
+// iteration closures, when we see a humongous region, we then simply mark it as
+// referenced, i.e. live.
+class G1InCSetStateFastTestBiasedMappedArray : public G1BiasedMappedArray<InCSetState> {
+ protected:
+  InCSetState default_value() const { return InCSetState::NotInCSet; }
+ public:
+  void set_humongous(uintptr_t index) {
+    assert(get_by_index(index).is_default(),
+           err_msg("State at index " INTPTR_FORMAT" should be default but is " CSETSTATE_FORMAT, index, get_by_index(index).value()));
+    set_by_index(index, InCSetState::Humongous);
+  }
+  void clear_humongous(uintptr_t index) {
+    set_by_index(index, InCSetState::NotInCSet);
+  }
+  void set_in_young(uintptr_t index) {
+    assert(get_by_index(index).is_default(),
+           err_msg("State at index " INTPTR_FORMAT" should be default but is " CSETSTATE_FORMAT, index, get_by_index(index).value()));
+    set_by_index(index, InCSetState::Young);
+  }
+  void set_in_old(uintptr_t index) {
+    assert(get_by_index(index).is_default(),
+           err_msg("State at index " INTPTR_FORMAT" should be default but is " CSETSTATE_FORMAT, index, get_by_index(index).value()));
+    set_by_index(index, InCSetState::Old);
+  }
+  bool is_in_cset_or_humongous(HeapWord* addr) const { return at(addr).is_in_cset_or_humongous(); }
+  bool is_in_cset(HeapWord* addr) const { return at(addr).is_in_cset(); }
+  InCSetState at(HeapWord* addr) const { return get_by_address(addr); }
+  void clear() { G1BiasedMappedArray<InCSetState>::clear(); }
+};
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1INCSETSTATE_HPP
--- a/src/share/vm/gc_implementation/g1/g1Log.hpp
+++ b/src/share/vm/gc_implementation/g1/g1Log.hpp
@@ -28,6 +28,7 @@
 #include "memory/allocation.hpp"
 class G1Log : public AllStatic {
+ public:
  typedef enum {
    LevelNone,
    LevelFine,
@@ -35,6 +36,7 @@ class G1Log : public AllStatic {
    LevelFinest
  } LogLevel;
+ private:
  static LogLevel _level;
 public:
@@ -50,6 +52,10 @@ class G1Log : public AllStatic {
    return _level == LevelFinest;
  }
+  static LogLevel level() {
+    return _level;
+  }
  static void init();
 };

--- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp
@@ -31,6 +31,7 @@
 #include "code/icBuffer.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
+#include "gc_implementation/g1/g1RootProcessor.hpp"
 #include "gc_implementation/g1/g1StringDedup.hpp"
 #include "gc_implementation/shared/gcHeapSummary.hpp"
 #include "gc_implementation/shared/gcTimer.hpp"
@@ -126,21 +127,22 @@ void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,
  GCTraceTime tm("phase 1", G1Log::fine() && Verbose, true, gc_timer(), gc_tracer()->gc_id());
  GenMarkSweep::trace(" 1");
-  SharedHeap* sh = SharedHeap::heap();
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
  // Need cleared claim bits for the roots processing
  ClassLoaderDataGraph::clear_claimed_marks();
  MarkingCodeBlobClosure follow_code_closure(&GenMarkSweep::follow_root_closure, !CodeBlobToOopClosure::FixRelocations);
-  sh->process_strong_roots(true,   // activate StrongRootsScope
+  {
-                           SharedHeap::SO_None,
+    G1RootProcessor root_processor(g1h);
-                           &GenMarkSweep::follow_root_closure,
+    root_processor.process_strong_roots(&GenMarkSweep::follow_root_closure,
-                           &GenMarkSweep::follow_cld_closure,
+                                        &GenMarkSweep::follow_cld_closure,
-                           &follow_code_closure);
+                                        &follow_code_closure);
+  }
  // Process reference objects found during marking
  ReferenceProcessor* rp = GenMarkSweep::ref_processor();
-  assert(rp == G1CollectedHeap::heap()->ref_processor_stw(), "Sanity");
+  assert(rp == g1h->ref_processor_stw(), "Sanity");
  rp->setup_policy(clear_all_softrefs);
  const ReferenceProcessorStats& stats =
@@ -226,6 +228,12 @@ class G1AdjustPointersClosure: public HeapRegionClosure {
  }
 };
+class G1AlwaysTrueClosure: public BoolObjectClosure {
+public:
+  bool do_object_b(oop p) { return true; }
+};
+static G1AlwaysTrueClosure always_true;
 void G1MarkSweep::mark_sweep_phase3() {
  G1CollectedHeap* g1h = G1CollectedHeap::heap();
@@ -233,24 +241,23 @@ void G1MarkSweep::mark_sweep_phase3() {
  GCTraceTime tm("phase 3", G1Log::fine() && Verbose, true, gc_timer(), gc_tracer()->gc_id());
  GenMarkSweep::trace("3");
-  SharedHeap* sh = SharedHeap::heap();
  // Need cleared claim bits for the roots processing
  ClassLoaderDataGraph::clear_claimed_marks();
  CodeBlobToOopClosure adjust_code_closure(&GenMarkSweep::adjust_pointer_closure, CodeBlobToOopClosure::FixRelocations);
-  sh->process_all_roots(true,  // activate StrongRootsScope
+  {
-                        SharedHeap::SO_AllCodeCache,
+    G1RootProcessor root_processor(g1h);
-                        &GenMarkSweep::adjust_pointer_closure,
+    root_processor.process_all_roots(&GenMarkSweep::adjust_pointer_closure,
-                        &GenMarkSweep::adjust_cld_closure,
+                                     &GenMarkSweep::adjust_cld_closure,
-                        &adjust_code_closure);
+                                     &adjust_code_closure);
+  }
  assert(GenMarkSweep::ref_processor() == g1h->ref_processor_stw(), "Sanity");
  g1h->ref_processor_stw()->weak_oops_do(&GenMarkSweep::adjust_pointer_closure);
  // Now adjust pointers in remaining weak roots.  (All of which should
  // have been cleared if they pointed to non-surviving objects.)
-  sh->process_weak_roots(&GenMarkSweep::adjust_pointer_closure);
+  JNIHandles::weak_oops_do(&always_true, &GenMarkSweep::adjust_pointer_closure);
  if (G1StringDedup::is_enabled()) {
    G1StringDedup::oops_do(&GenMarkSweep::adjust_pointer_closure);

--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP
 #include "memory/iterator.hpp"
+#include "oops/markOop.hpp"
 class HeapRegion;
 class G1CollectedHeap;
@@ -239,14 +240,14 @@ class G1UpdateRSOrPushRefOopClosure: public ExtendedOopClosure {
  G1CollectedHeap* _g1;
  G1RemSet* _g1_rem_set;
  HeapRegion* _from;
-  OopsInHeapRegionClosure* _push_ref_cl;
+  G1ParPushHeapRSClosure* _push_ref_cl;
  bool _record_refs_into_cset;
  uint _worker_i;
 public:
  G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
                                G1RemSet* rs,
-                                OopsInHeapRegionClosure* push_ref_cl,
+                                G1ParPushHeapRSClosure* push_ref_cl,
                                bool record_refs_into_cset,
                                uint worker_i = 0);
@@ -256,7 +257,8 @@ public:
  }
  bool self_forwarded(oop obj) {
-    bool result = (obj->is_forwarded() && (obj->forwardee()== obj));
+    markOop m = obj->mark();
+    bool result = (m->is_marked() && ((oop)m->decode_pointer() == obj));
    return result;
  }

--- a/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
@@ -67,8 +67,8 @@ inline void G1ParScanClosure::do_oop_nv(T* p) {
  if (!oopDesc::is_null(heap_oop)) {
    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj);
+    const InCSetState state = _g1->in_cset_state(obj);
-    if (state == G1CollectedHeap::InCSet) {
+    if (state.is_in_cset()) {
      // We're not going to even bother checking whether the object is
      // already forwarded or not, as this usually causes an immediate
      // stall. We'll try to prefetch the object (for write, given that
@@ -87,7 +87,7 @@ inline void G1ParScanClosure::do_oop_nv(T* p) {
      _par_scan_state->push_on_queue(p);
    } else {
-      if (state == G1CollectedHeap::IsHumongous) {
+      if (state.is_humongous()) {
        _g1->set_humongous_is_live(obj);
      }
      _par_scan_state->update_rs(_from, p, _worker_id);

--- a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp
+++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp
@@ -38,6 +38,7 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num,
    _g1_rem(g1h->g1_rem_set()),
    _hash_seed(17), _queue_num(queue_num),
    _term_attempts(0),
+    _tenuring_threshold(g1h->g1_policy()->tenuring_threshold()),
    _age_table(false), _scanner(g1h, rp),
    _strong_roots_time(0), _term_time(0) {
  _scanner.set_par_scan_thread_state(this);
@@ -59,6 +60,12 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num,
  _g1_par_allocator = G1ParGCAllocator::create_allocator(_g1h);
+  _dest[InCSetState::NotInCSet]    = InCSetState::NotInCSet;
+  // The dest for Young is used when the objects are aged enough to
+  // need to be moved to the next space.
+  _dest[InCSetState::Young]        = InCSetState::Old;
+  _dest[InCSetState::Old]          = InCSetState::Old;
  _start = os::elapsedTime();
 }
@@ -150,86 +157,126 @@ void G1ParScanThreadState::trim_queue() {
  } while (!_refs->is_empty());
 }
-oop G1ParScanThreadState::copy_to_survivor_space(oop const old) {
+HeapWord* G1ParScanThreadState::allocate_in_next_plab(InCSetState const state,
-  size_t word_sz = old->size();
+                                                      InCSetState* dest,
-  HeapRegion* from_region = _g1h->heap_region_containing_raw(old);
+                                                      size_t word_sz,
+                                                      AllocationContext_t const context) {
+  assert(state.is_in_cset_or_humongous(), err_msg("Unexpected state: " CSETSTATE_FORMAT, state.value()));
+  assert(dest->is_in_cset_or_humongous(), err_msg("Unexpected dest: " CSETSTATE_FORMAT, dest->value()));
+  // Right now we only have two types of regions (young / old) so
+  // let's keep the logic here simple. We can generalize it when necessary.
+  if (dest->is_young()) {
+    HeapWord* const obj_ptr = _g1_par_allocator->allocate(InCSetState::Old,
+                                                          word_sz, context);
+    if (obj_ptr == NULL) {
+      return NULL;
+    }
+    // Make sure that we won't attempt to copy any other objects out
+    // of a survivor region (given that apparently we cannot allocate
+    // any new ones) to avoid coming into this slow path.
+    _tenuring_threshold = 0;
+    dest->set_old();
+    return obj_ptr;
+  } else {
+    assert(dest->is_old(), err_msg("Unexpected dest: " CSETSTATE_FORMAT, dest->value()));
+    // no other space to try.
+    return NULL;
+  }
+}
+InCSetState G1ParScanThreadState::next_state(InCSetState const state, markOop const m, uint& age) {
+  if (state.is_young()) {
+    age = !m->has_displaced_mark_helper() ? m->age()
+                                          : m->displaced_mark_helper()->age();
+    if (age < _tenuring_threshold) {
+      return state;
+    }
+  }
+  return dest(state);
+}
+oop G1ParScanThreadState::copy_to_survivor_space(InCSetState const state,
+                                                 oop const old,
+                                                 markOop const old_mark) {
+  const size_t word_sz = old->size();
+  HeapRegion* const from_region = _g1h->heap_region_containing_raw(old);
  // +1 to make the -1 indexes valid...
-  int       young_index = from_region->young_index_in_cset()+1;
+  const int young_index = from_region->young_index_in_cset()+1;
  assert( (from_region->is_young() && young_index >  0) ||
         (!from_region->is_young() && young_index == 0), "invariant" );
-  G1CollectorPolicy* g1p = _g1h->g1_policy();
+  const AllocationContext_t context = from_region->allocation_context();
-  markOop m = old->mark();
-  int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
+  uint age = 0;
-                                           : m->age();
+  InCSetState dest_state = next_state(state, old_mark, age);
-  GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age,
+  HeapWord* obj_ptr = _g1_par_allocator->plab_allocate(dest_state, word_sz, context);
-                                                             word_sz);
-  AllocationContext_t context = from_region->allocation_context();
+  // PLAB allocations should succeed most of the time, so we'll
-  HeapWord* obj_ptr = _g1_par_allocator->allocate(alloc_purpose, word_sz, context);
+  // normally check against NULL once and that's it.
-#ifndef PRODUCT
+  if (obj_ptr == NULL) {
-  // Should this evacuation fail?
+    obj_ptr = _g1_par_allocator->allocate_direct_or_new_plab(dest_state, word_sz, context);
-  if (_g1h->evacuation_should_fail()) {
+    if (obj_ptr == NULL) {
-    if (obj_ptr != NULL) {
+      obj_ptr = allocate_in_next_plab(state, &dest_state, word_sz, context);
-      _g1_par_allocator->undo_allocation(alloc_purpose, obj_ptr, word_sz, context);
+      if (obj_ptr == NULL) {
-      obj_ptr = NULL;
+        // This will either forward-to-self, or detect that someone else has
+        // installed a forwarding pointer.
+        return _g1h->handle_evacuation_failure_par(this, old);
+      }
    }
  }
-#endif // !PRODUCT
-  if (obj_ptr == NULL) {
+  assert(obj_ptr != NULL, "when we get here, allocation should have succeeded");
-    // This will either forward-to-self, or detect that someone else has
+#ifndef PRODUCT
-    // installed a forwarding pointer.
+  // Should this evacuation fail?
+  if (_g1h->evacuation_should_fail()) {
+    // Doing this after all the allocation attempts also tests the
+    // undo_allocation() method too.
+    _g1_par_allocator->undo_allocation(dest_state, obj_ptr, word_sz, context);
    return _g1h->handle_evacuation_failure_par(this, old);
  }
+#endif // !PRODUCT
-  oop obj = oop(obj_ptr);
  // We're going to allocate linearly, so might as well prefetch ahead.
  Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
-  oop forward_ptr = old->forward_to_atomic(obj);
+  const oop obj = oop(obj_ptr);
+  const oop forward_ptr = old->forward_to_atomic(obj);
  if (forward_ptr == NULL) {
    Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
-    // alloc_purpose is just a hint to allocate() above, recheck the type of region
+    if (dest_state.is_young()) {
-    // we actually allocated from and update alloc_purpose accordingly
+      if (age < markOopDesc::max_age) {
-    HeapRegion* to_region = _g1h->heap_region_containing_raw(obj_ptr);
+        age++;
-    alloc_purpose = to_region->is_young() ? GCAllocForSurvived : GCAllocForTenured;
+      }
+      if (old_mark->has_displaced_mark_helper()) {
-    if (g1p->track_object_age(alloc_purpose)) {
+        // In this case, we have to install the mark word first,
-      // We could simply do obj->incr_age(). However, this causes a
-      // performance issue. obj->incr_age() will first check whether
-      // the object has a displaced mark by checking its mark word;
-      // getting the mark word from the new location of the object
-      // stalls. So, given that we already have the mark word and we
-      // are about to install it anyway, it's better to increase the
-      // age on the mark word, when the object does not have a
-      // displaced mark word. We're not expecting many objects to have
-      // a displaced marked word, so that case is not optimized
-      // further (it could be...) and we simply call obj->incr_age().
-      if (m->has_displaced_mark_helper()) {
-        // in this case, we have to install the mark word first,
        // otherwise obj looks to be forwarded (the old mark word,
        // which contains the forward pointer, was copied)
-        obj->set_mark(m);
+        obj->set_mark(old_mark);
-        obj->incr_age();
+        markOop new_mark = old_mark->displaced_mark_helper()->set_age(age);
+        old_mark->set_displaced_mark_helper(new_mark);
      } else {
-        m = m->incr_age();
+        obj->set_mark(old_mark->set_age(age));
-        obj->set_mark(m);
      }
-      age_table()->add(obj, word_sz);
+      age_table()->add(age, word_sz);
    } else {
-      obj->set_mark(m);
+      obj->set_mark(old_mark);
    }
    if (G1StringDedup::is_enabled()) {
-      G1StringDedup::enqueue_from_evacuation(from_region->is_young(),
+      const bool is_from_young = state.is_young();
-                                             to_region->is_young(),
+      const bool is_to_young = dest_state.is_young();
+      assert(is_from_young == _g1h->heap_region_containing_raw(old)->is_young(),
+             "sanity");
+      assert(is_to_young == _g1h->heap_region_containing_raw(obj)->is_young(),
+             "sanity");
+      G1StringDedup::enqueue_from_evacuation(is_from_young,
+                                             is_to_young,
                                             queue_num(),
                                             obj);
    }
-    size_t* surv_young_words = surviving_young_words();
+    size_t* const surv_young_words = surviving_young_words();
    surv_young_words[young_index] += word_sz;
    if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
@@ -240,14 +287,13 @@ oop G1ParScanThreadState::copy_to_survivor_space(oop const old) {
      oop* old_p = set_partial_array_mask(old);
      push_on_queue(old_p);
    } else {
-      // No point in using the slower heap_region_containing() method,
+      HeapRegion* const to_region = _g1h->heap_region_containing_raw(obj_ptr);
-      // given that we know obj is in the heap.
+      _scanner.set_region(to_region);
-      _scanner.set_region(_g1h->heap_region_containing_raw(obj));
      obj->oop_iterate_backwards(&_scanner);
    }
+    return obj;
  } else {
-    _g1_par_allocator->undo_allocation(alloc_purpose, obj_ptr, word_sz, context);
+    _g1_par_allocator->undo_allocation(dest_state, obj_ptr, word_sz, context);
-    obj = forward_ptr;
+    return forward_ptr;
  }
-  return obj;
 }
--- a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp
+++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp
@@ -46,14 +46,16 @@ class G1ParScanThreadState : public StackObj {
  G1SATBCardTableModRefBS* _ct_bs;
  G1RemSet* _g1_rem;
-  G1ParGCAllocator*   _g1_par_allocator;
+  G1ParGCAllocator* _g1_par_allocator;
-  ageTable            _age_table;
+  ageTable          _age_table;
+  InCSetState       _dest[InCSetState::Num];
+  // Local tenuring threshold.
+  uint              _tenuring_threshold;
+  G1ParScanClosure  _scanner;
-  G1ParScanClosure    _scanner;
+  size_t            _alloc_buffer_waste;
+  size_t            _undo_waste;
-  size_t           _alloc_buffer_waste;
-  size_t           _undo_waste;
  OopsInHeapRegionClosure*      _evac_failure_cl;
@@ -82,6 +84,14 @@ class G1ParScanThreadState : public StackObj {
  DirtyCardQueue& dirty_card_queue()             { return _dcq;  }
  G1SATBCardTableModRefBS* ctbs()                { return _ct_bs; }
+  InCSetState dest(InCSetState original) const {
+    assert(original.is_valid(),
+           err_msg("Original state invalid: " CSETSTATE_FORMAT, original.value()));
+    assert(_dest[original.value()].is_valid_gen(),
+           err_msg("Dest state is invalid: " CSETSTATE_FORMAT, _dest[original.value()].value()));
+    return _dest[original.value()];
+  }
 public:
  G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, ReferenceProcessor* rp);
  ~G1ParScanThreadState();
@@ -112,7 +122,6 @@ class G1ParScanThreadState : public StackObj {
      }
   }
  }
- public:
  void set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_cl) {
    _evac_failure_cl = evac_failure_cl;
@@ -193,9 +202,20 @@ class G1ParScanThreadState : public StackObj {
  template <class T> inline void deal_with_reference(T* ref_to_scan);
  inline void dispatch_reference(StarTask ref);
+  // Tries to allocate word_sz in the PLAB of the next "generation" after trying to
+  // allocate into dest. State is the original (source) cset state for the object
+  // that is allocated for.
+  // Returns a non-NULL pointer if successful, and updates dest if required.
+  HeapWord* allocate_in_next_plab(InCSetState const state,
+                                  InCSetState* dest,
+                                  size_t word_sz,
+                                  AllocationContext_t const context);
+  inline InCSetState next_state(InCSetState const state, markOop const m, uint& age);
 public:
-  oop copy_to_survivor_space(oop const obj);
+  oop copy_to_survivor_space(InCSetState const state, oop const obj, markOop const old_mark);
  void trim_queue();

--- a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp
+++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp
@@ -38,20 +38,21 @@ template <class T> void G1ParScanThreadState::do_oop_evac(T* p, HeapRegion* from
  // set, due to (benign) races in the claim mechanism during RSet scanning more
  // than one thread might claim the same card. So the same card may be
  // processed multiple times. So redo this check.
-  G1CollectedHeap::in_cset_state_t in_cset_state = _g1h->in_cset_state(obj);
+  const InCSetState in_cset_state = _g1h->in_cset_state(obj);
-  if (in_cset_state == G1CollectedHeap::InCSet) {
+  if (in_cset_state.is_in_cset()) {
    oop forwardee;
-    if (obj->is_forwarded()) {
+    markOop m = obj->mark();
-      forwardee = obj->forwardee();
+    if (m->is_marked()) {
+      forwardee = (oop) m->decode_pointer();
    } else {
-      forwardee = copy_to_survivor_space(obj);
+      forwardee = copy_to_survivor_space(in_cset_state, obj, m);
    }
    oopDesc::encode_store_heap_oop(p, forwardee);
-  } else if (in_cset_state == G1CollectedHeap::IsHumongous) {
+  } else if (in_cset_state.is_humongous()) {
    _g1h->set_humongous_is_live(obj);
  } else {
-    assert(in_cset_state == G1CollectedHeap::InNeither,
+    assert(!in_cset_state.is_in_cset_or_humongous(),
-           err_msg("In_cset_state must be InNeither here, but is %d", in_cset_state));
+           err_msg("In_cset_state must be NotInCSet here, but is " CSETSTATE_FORMAT, in_cset_state.value()));
  }
  assert(obj != NULL, "Must be");

--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp
@@ -78,9 +78,8 @@ G1RemSet::G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
    _cards_scanned(NULL), _total_cards_scanned(0),
    _prev_period_summary()
 {
-  _seq_task = new SubTasksDone(NumSeqTasks);
  guarantee(n_workers() > 0, "There should be some workers");
-  _cset_rs_update_cl = NEW_C_HEAP_ARRAY(OopsInHeapRegionClosure*, n_workers(), mtGC);
+  _cset_rs_update_cl = NEW_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, n_workers(), mtGC);
  for (uint i = 0; i < n_workers(); i++) {
    _cset_rs_update_cl[i] = NULL;
  }
@@ -90,11 +89,10 @@ G1RemSet::G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
 }
 G1RemSet::~G1RemSet() {
-  delete _seq_task;
  for (uint i = 0; i < n_workers(); i++) {
    assert(_cset_rs_update_cl[i] == NULL, "it should be");
  }
-  FREE_C_HEAP_ARRAY(OopsInHeapRegionClosure*, _cset_rs_update_cl, mtGC);
+  FREE_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, _cset_rs_update_cl, mtGC);
 }
 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
@@ -108,7 +106,7 @@ class ScanRSClosure : public HeapRegionClosure {
  size_t _cards_done, _cards;
  G1CollectedHeap* _g1h;
-  OopsInHeapRegionClosure* _oc;
+  G1ParPushHeapRSClosure* _oc;
  CodeBlobClosure* _code_root_cl;
  G1BlockOffsetSharedArray* _bot_shared;
@@ -120,7 +118,7 @@ class ScanRSClosure : public HeapRegionClosure {
  bool   _try_claimed;
 public:
-  ScanRSClosure(OopsInHeapRegionClosure* oc,
+  ScanRSClosure(G1ParPushHeapRSClosure* oc,
                CodeBlobClosure* code_root_cl,
                uint worker_i) :
    _oc(oc),
@@ -142,16 +140,13 @@ public:
  void scanCard(size_t index, HeapRegion *r) {
    // Stack allocate the DirtyCardToOopClosure instance
    HeapRegionDCTOC cl(_g1h, r, _oc,
-                       CardTableModRefBS::Precise,
+                       CardTableModRefBS::Precise);
-                       HeapRegionDCTOC::IntoCSFilterKind);
    // Set the "from" region in the closure.
    _oc->set_region(r);
-    HeapWord* card_start = _bot_shared->address_for_index(index);
+    MemRegion card_region(_bot_shared->address_for_index(index), G1BlockOffsetSharedArray::N_words);
-    HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words;
+    MemRegion pre_gc_allocated(r->bottom(), r->scan_top());
-    Space *sp = SharedHeap::heap()->space_containing(card_start);
+    MemRegion mr = pre_gc_allocated.intersection(card_region);
-    MemRegion sm_region = sp->used_region_at_save_marks();
-    MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end));
    if (!mr.is_empty() && !_ct_bs->is_card_claimed(index)) {
      // We make the card as "claimed" lazily (so races are possible
      // but they're benign), which reduces the number of duplicate
@@ -240,7 +235,7 @@ public:
  size_t cards_looked_up() { return _cards;}
 };
-void G1RemSet::scanRS(OopsInHeapRegionClosure* oc,
+void G1RemSet::scanRS(G1ParPushHeapRSClosure* oc,
                      CodeBlobClosure* code_root_cl,
                      uint worker_i) {
  double rs_time_start = os::elapsedTime();
@@ -258,9 +253,8 @@ void G1RemSet::scanRS(OopsInHeapRegionClosure* oc,
  assert(_cards_scanned != NULL, "invariant");
  _cards_scanned[worker_i] = scanRScl.cards_done();
-  _g1p->phase_times()->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
+  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
-  _g1p->phase_times()->record_strong_code_root_scan_time(worker_i,
+  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, scanRScl.strong_code_root_scan_time_sec());
-                                                         scanRScl.strong_code_root_scan_time_sec() * 1000.0);
 }
 // Closure used for updating RSets and recording references that
@@ -297,29 +291,18 @@ public:
 };
 void G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i) {
-  double start = os::elapsedTime();
+  G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i);
  // Apply the given closure to all remaining log entries.
  RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq);
  _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i);
-  // Now there should be no dirty cards.
-  if (G1RSLogCheckCardTable) {
-    CountNonCleanMemRegionClosure cl(_g1);
-    _ct_bs->mod_card_iterate(&cl);
-    // XXX This isn't true any more: keeping cards of young regions
-    // marked dirty broke it.  Need some reasonable fix.
-    guarantee(cl.n() == 0, "Card table should be clean.");
-  }
-  _g1p->phase_times()->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
 }
 void G1RemSet::cleanupHRRS() {
  HeapRegionRemSet::cleanup();
 }
-void G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
+void G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* oc,
                                           CodeBlobClosure* code_root_cl,
                                           uint worker_i) {
 #if CARD_REPEAT_HISTO
@@ -344,23 +327,8 @@ void G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
  assert((ParallelGCThreads > 0) || worker_i == 0, "invariant");
-  // The two flags below were introduced temporarily to serialize
+  updateRS(&into_cset_dcq, worker_i);
-  // the updating and scanning of remembered sets. There are some
+  scanRS(oc, code_root_cl, worker_i);
-  // race conditions when these two operations are done in parallel
-  // and they are causing failures. When we resolve said race
-  // conditions, we'll revert back to parallel remembered set
-  // updating and scanning. See CRs 6677707 and 6677708.
-  if (G1UseParallelRSetUpdating || (worker_i == 0)) {
-    updateRS(&into_cset_dcq, worker_i);
-  } else {
-    _g1p->phase_times()->record_update_rs_processed_buffers(worker_i, 0);
-    _g1p->phase_times()->record_update_rs_time(worker_i, 0.0);
-  }
-  if (G1UseParallelRSetScanning || (worker_i == 0)) {
-    scanRS(oc, code_root_cl, worker_i);
-  } else {
-    _g1p->phase_times()->record_scan_rs_time(worker_i, 0.0);
-  }
  // We now clear the cached values of _cset_rs_update_cl for this worker
  _cset_rs_update_cl[worker_i] = NULL;
@@ -461,7 +429,7 @@ G1Mux2Closure::G1Mux2Closure(OopClosure *c1, OopClosure *c2) :
 G1UpdateRSOrPushRefOopClosure::
 G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
                              G1RemSet* rs,
-                              OopsInHeapRegionClosure* push_ref_cl,
+                              G1ParPushHeapRSClosure* push_ref_cl,
                              bool record_refs_into_cset,
                              uint worker_i) :
  _g1(g1h), _g1_rem_set(rs), _from(NULL),
@@ -562,7 +530,7 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i,
  ct_freq_note_card(_ct_bs->index_for(start));
 #endif
-  OopsInHeapRegionClosure* oops_in_heap_closure = NULL;
+  G1ParPushHeapRSClosure* oops_in_heap_closure = NULL;
  if (check_for_refs_into_cset) {
    // ConcurrentG1RefineThreads have worker numbers larger than what
    // _cset_rs_update_cl[] is set up to handle. But those threads should

--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp
@@ -33,6 +33,7 @@
 class G1CollectedHeap;
 class CardTableModRefBarrierSet;
 class ConcurrentG1Refine;
+class G1ParPushHeapRSClosure;
 // A G1RemSet in which each heap region has a rem set that records the
 // external heap references into it.  Uses a mod ref bs to track updates,
@@ -58,7 +59,6 @@ protected:
  };
  CardTableModRefBS*     _ct_bs;
-  SubTasksDone*          _seq_task;
  G1CollectorPolicy*     _g1p;
  ConcurrentG1Refine*    _cg1r;
@@ -68,7 +68,7 @@ protected:
  // Used for caching the closure that is responsible for scanning
  // references into the collection set.
-  OopsInHeapRegionClosure** _cset_rs_update_cl;
+  G1ParPushHeapRSClosure** _cset_rs_update_cl;
  // Print the given summary info
  virtual void print_summary_info(G1RemSetSummary * summary, const char * header = NULL);
@@ -95,7 +95,7 @@ public:
  // partitioning the work to be done. It should be the same as
  // the "i" passed to the calling thread's work(i) function.
  // In the sequential case this param will be ignored.
-  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+  void oops_into_collection_set_do(G1ParPushHeapRSClosure* blk,
                                   CodeBlobClosure* code_root_cl,
                                   uint worker_i);
@@ -107,7 +107,7 @@ public:
  void prepare_for_oops_into_collection_set_do();
  void cleanup_after_oops_into_collection_set_do();
-  void scanRS(OopsInHeapRegionClosure* oc,
+  void scanRS(G1ParPushHeapRSClosure* oc,
              CodeBlobClosure* code_root_cl,
              uint worker_i);

--- a/src/share/vm/gc_implementation/g1/g1RootProcessor.cpp
+++ b/src/share/vm/gc_implementation/g1/g1RootProcessor.cpp
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#include "precompiled.hpp"
+#include "classfile/symbolTable.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "code/codeCache.hpp"
+#include "gc_implementation/g1/bufferingOopClosure.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
+#include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "gc_implementation/g1/g1RootProcessor.hpp"
+#include "memory/allocation.inline.hpp"
+#include "runtime/fprofiler.hpp"
+#include "runtime/mutex.hpp"
+#include "services/management.hpp"
+class G1CodeBlobClosure : public CodeBlobClosure {
+  class HeapRegionGatheringOopClosure : public OopClosure {
+    G1CollectedHeap* _g1h;
+    OopClosure* _work;
+    nmethod* _nm;
+    template <typename T>
+    void do_oop_work(T* p) {
+      _work->do_oop(p);
+      T oop_or_narrowoop = oopDesc::load_heap_oop(p);
+      if (!oopDesc::is_null(oop_or_narrowoop)) {
+        oop o = oopDesc::decode_heap_oop_not_null(oop_or_narrowoop);
+        HeapRegion* hr = _g1h->heap_region_containing_raw(o);
+        assert(!_g1h->obj_in_cs(o) || hr->rem_set()->strong_code_roots_list_contains(_nm), "if o still in CS then evacuation failed and nm must already be in the remset");
+        hr->add_strong_code_root(_nm);
+      }
+    }
+  public:
+    HeapRegionGatheringOopClosure(OopClosure* oc) : _g1h(G1CollectedHeap::heap()), _work(oc), _nm(NULL) {}
+    void do_oop(oop* o) {
+      do_oop_work(o);
+    }
+    void do_oop(narrowOop* o) {
+      do_oop_work(o);
+    }
+    void set_nm(nmethod* nm) {
+      _nm = nm;
+    }
+  };
+  HeapRegionGatheringOopClosure _oc;
+public:
+  G1CodeBlobClosure(OopClosure* oc) : _oc(oc) {}
+  void do_code_blob(CodeBlob* cb) {
+    nmethod* nm = cb->as_nmethod_or_null();
+    if (nm != NULL) {
+      if (!nm->test_set_oops_do_mark()) {
+        _oc.set_nm(nm);
+        nm->oops_do(&_oc);
+        nm->fix_oop_relocations();
+      }
+    }
+  }
+};
+void G1RootProcessor::worker_has_discovered_all_strong_classes() {
+  uint n_workers = _g1h->n_par_threads();
+  assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
+  uint new_value = (uint)Atomic::add(1, &_n_workers_discovered_strong_classes);
+  if (new_value == n_workers) {
+    // This thread is last. Notify the others.
+    MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag);
+    _lock.notify_all();
+  }
+}
+void G1RootProcessor::wait_until_all_strong_classes_discovered() {
+  uint n_workers = _g1h->n_par_threads();
+  assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
+  if ((uint)_n_workers_discovered_strong_classes != n_workers) {
+    MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag);
+    while ((uint)_n_workers_discovered_strong_classes != n_workers) {
+      _lock.wait(Mutex::_no_safepoint_check_flag, 0, false);
+    }
+  }
+}
+G1RootProcessor::G1RootProcessor(G1CollectedHeap* g1h) :
+    _g1h(g1h),
+    _process_strong_tasks(new SubTasksDone(G1RP_PS_NumElements)),
+    _srs(g1h),
+    _lock(Mutex::leaf, "G1 Root Scanning barrier lock", false),
+    _n_workers_discovered_strong_classes(0) {}
+void G1RootProcessor::evacuate_roots(OopClosure* scan_non_heap_roots,
+                                     OopClosure* scan_non_heap_weak_roots,
+                                     CLDClosure* scan_strong_clds,
+                                     CLDClosure* scan_weak_clds,
+                                     bool trace_metadata,
+                                     uint worker_i) {
+  // First scan the shared roots.
+  double ext_roots_start = os::elapsedTime();
+  G1GCPhaseTimes* phase_times = _g1h->g1_policy()->phase_times();
+  BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);
+  BufferingOopClosure buf_scan_non_heap_weak_roots(scan_non_heap_weak_roots);
+  OopClosure* const weak_roots = &buf_scan_non_heap_weak_roots;
+  OopClosure* const strong_roots = &buf_scan_non_heap_roots;
+  // CodeBlobClosures are not interoperable with BufferingOopClosures
+  G1CodeBlobClosure root_code_blobs(scan_non_heap_roots);
+  process_java_roots(strong_roots,
+                     trace_metadata ? scan_strong_clds : NULL,
+                     scan_strong_clds,
+                     trace_metadata ? NULL : scan_weak_clds,
+                     &root_code_blobs,
+                     phase_times,
+                     worker_i);
+  // This is the point where this worker thread will not find more strong CLDs/nmethods.
+  // Report this so G1 can synchronize the strong and weak CLDs/nmethods processing.
+  if (trace_metadata) {
+    worker_has_discovered_all_strong_classes();
+  }
+  process_vm_roots(strong_roots, weak_roots, phase_times, worker_i);
+  {
+    // Now the CM ref_processor roots.
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CMRefRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_refProcessor_oops_do)) {
+      // We need to treat the discovered reference lists of the
+      // concurrent mark ref processor as roots and keep entries
+      // (which are added by the marking threads) on them live
+      // until they can be processed at the end of marking.
+      _g1h->ref_processor_cm()->weak_oops_do(&buf_scan_non_heap_roots);
+    }
+  }
+  if (trace_metadata) {
+    {
+      G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::WaitForStrongCLD, worker_i);
+      // Barrier to make sure all workers passed
+      // the strong CLD and strong nmethods phases.
+      wait_until_all_strong_classes_discovered();
+    }
+    // Now take the complement of the strong CLDs.
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::WeakCLDRoots, worker_i);
+    ClassLoaderDataGraph::roots_cld_do(NULL, scan_weak_clds);
+  } else {
+    phase_times->record_time_secs(G1GCPhaseTimes::WaitForStrongCLD, worker_i, 0.0);
+    phase_times->record_time_secs(G1GCPhaseTimes::WeakCLDRoots, worker_i, 0.0);
+  }
+  // Finish up any enqueued closure apps (attributed as object copy time).
+  buf_scan_non_heap_roots.done();
+  buf_scan_non_heap_weak_roots.done();
+  double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds()
+      + buf_scan_non_heap_weak_roots.closure_app_seconds();
+  phase_times->record_time_secs(G1GCPhaseTimes::ObjCopy, worker_i, obj_copy_time_sec);
+  double ext_root_time_sec = os::elapsedTime() - ext_roots_start - obj_copy_time_sec;
+  phase_times->record_time_secs(G1GCPhaseTimes::ExtRootScan, worker_i, ext_root_time_sec);
+  // During conc marking we have to filter the per-thread SATB buffers
+  // to make sure we remove any oops into the CSet (which will show up
+  // as implicitly live).
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::SATBFiltering, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_filter_satb_buffers) && _g1h->mark_in_progress()) {
+      JavaThread::satb_mark_queue_set().filter_thread_buffers();
+    }
+  }
+  _process_strong_tasks->all_tasks_completed();
+}
+void G1RootProcessor::process_strong_roots(OopClosure* oops,
+                                           CLDClosure* clds,
+                                           CodeBlobClosure* blobs) {
+  process_java_roots(oops, clds, clds, NULL, blobs, NULL, 0);
+  process_vm_roots(oops, NULL, NULL, 0);
+  _process_strong_tasks->all_tasks_completed();
+}
+void G1RootProcessor::process_all_roots(OopClosure* oops,
+                                        CLDClosure* clds,
+                                        CodeBlobClosure* blobs) {
+  process_java_roots(oops, NULL, clds, clds, NULL, NULL, 0);
+  process_vm_roots(oops, oops, NULL, 0);
+  if (!_process_strong_tasks->is_task_claimed(G1RP_PS_CodeCache_oops_do)) {
+    CodeCache::blobs_do(blobs);
+  }
+  _process_strong_tasks->all_tasks_completed();
+}
+void G1RootProcessor::process_java_roots(OopClosure* strong_roots,
+                                         CLDClosure* thread_stack_clds,
+                                         CLDClosure* strong_clds,
+                                         CLDClosure* weak_clds,
+                                         CodeBlobClosure* strong_code,
+                                         G1GCPhaseTimes* phase_times,
+                                         uint worker_i) {
+  assert(thread_stack_clds == NULL || weak_clds == NULL, "There is overlap between those, only one may be set");
+  // Iterating over the CLDG and the Threads are done early to allow us to
+  // first process the strong CLDs and nmethods and then, after a barrier,
+  // let the thread process the weak CLDs and nmethods.
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CLDGRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_ClassLoaderDataGraph_oops_do)) {
+      ClassLoaderDataGraph::roots_cld_do(strong_clds, weak_clds);
+    }
+  }
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ThreadRoots, worker_i);
+    Threads::possibly_parallel_oops_do(strong_roots, thread_stack_clds, strong_code);
+  }
+}
+void G1RootProcessor::process_vm_roots(OopClosure* strong_roots,
+                                       OopClosure* weak_roots,
+                                       G1GCPhaseTimes* phase_times,
+                                       uint worker_i) {
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::UniverseRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_Universe_oops_do)) {
+      Universe::oops_do(strong_roots);
+    }
+  }
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::JNIRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_JNIHandles_oops_do)) {
+      JNIHandles::oops_do(strong_roots);
+    }
+  }
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ObjectSynchronizerRoots, worker_i);
+    if (!_process_strong_tasks-> is_task_claimed(G1RP_PS_ObjectSynchronizer_oops_do)) {
+      ObjectSynchronizer::oops_do(strong_roots);
+    }
+  }
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::FlatProfilerRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_FlatProfiler_oops_do)) {
+      FlatProfiler::oops_do(strong_roots);
+    }
+  }
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ManagementRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_Management_oops_do)) {
+      Management::oops_do(strong_roots);
+    }
+  }
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::JVMTIRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_jvmti_oops_do)) {
+      JvmtiExport::oops_do(strong_roots);
+    }
+  }
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::SystemDictionaryRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_SystemDictionary_oops_do)) {
+      SystemDictionary::roots_oops_do(strong_roots, weak_roots);
+    }
+  }
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::StringTableRoots, worker_i);
+    // All threads execute the following. A specific chunk of buckets
+    // from the StringTable are the individual tasks.
+    if (weak_roots != NULL) {
+      StringTable::possibly_parallel_oops_do(weak_roots);
+    }
+  }
+}
+void G1RootProcessor::scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs,
+                                           OopClosure* scan_non_heap_weak_roots,
+                                           uint worker_i) {
+  G1GCPhaseTimes* phase_times = _g1h->g1_policy()->phase_times();
+  G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CodeCacheRoots, worker_i);
+  // Now scan the complement of the collection set.
+  G1CodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots);
+  _g1h->g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i);
+}
+void G1RootProcessor::set_num_workers(int active_workers) {
+  _process_strong_tasks->set_n_threads(active_workers);
+}
--- a/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp
+++ b/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP
+#include "memory/allocation.hpp"
+#include "memory/sharedHeap.hpp"
+#include "runtime/mutex.hpp"
+class CLDClosure;
+class CodeBlobClosure;
+class G1CollectedHeap;
+class G1GCPhaseTimes;
+class G1ParPushHeapRSClosure;
+class Monitor;
+class OopClosure;
+class SubTasksDone;
+// Scoped object to assist in applying oop, CLD and code blob closures to
+// root locations. Handles claiming of different root scanning tasks
+// and takes care of global state for root scanning via a StrongRootsScope.
+// In the parallel case there is a shared G1RootProcessor object where all
+// worker thread call the process_roots methods.
+class G1RootProcessor : public StackObj {
+  G1CollectedHeap* _g1h;
+  SubTasksDone* _process_strong_tasks;
+  SharedHeap::StrongRootsScope _srs;
+  // Used to implement the Thread work barrier.
+  Monitor _lock;
+  volatile jint _n_workers_discovered_strong_classes;
+  enum G1H_process_roots_tasks {
+    G1RP_PS_Universe_oops_do,
+    G1RP_PS_JNIHandles_oops_do,
+    G1RP_PS_ObjectSynchronizer_oops_do,
+    G1RP_PS_FlatProfiler_oops_do,
+    G1RP_PS_Management_oops_do,
+    G1RP_PS_SystemDictionary_oops_do,
+    G1RP_PS_ClassLoaderDataGraph_oops_do,
+    G1RP_PS_jvmti_oops_do,
+    G1RP_PS_CodeCache_oops_do,
+    G1RP_PS_filter_satb_buffers,
+    G1RP_PS_refProcessor_oops_do,
+    // Leave this one last.
+    G1RP_PS_NumElements
+  };
+  void worker_has_discovered_all_strong_classes();
+  void wait_until_all_strong_classes_discovered();
+  void process_java_roots(OopClosure* scan_non_heap_roots,
+                          CLDClosure* thread_stack_clds,
+                          CLDClosure* scan_strong_clds,
+                          CLDClosure* scan_weak_clds,
+                          CodeBlobClosure* scan_strong_code,
+                          G1GCPhaseTimes* phase_times,
+                          uint worker_i);
+  void process_vm_roots(OopClosure* scan_non_heap_roots,
+                        OopClosure* scan_non_heap_weak_roots,
+                        G1GCPhaseTimes* phase_times,
+                        uint worker_i);
+public:
+  G1RootProcessor(G1CollectedHeap* g1h);
+  // Apply closures to the strongly and weakly reachable roots in the system
+  // in a single pass.
+  // Record and report timing measurements for sub phases using the worker_i
+  void evacuate_roots(OopClosure* scan_non_heap_roots,
+                      OopClosure* scan_non_heap_weak_roots,
+                      CLDClosure* scan_strong_clds,
+                      CLDClosure* scan_weak_clds,
+                      bool trace_metadata,
+                      uint worker_i);
+  // Apply oops, clds and blobs to all strongly reachable roots in the system
+  void process_strong_roots(OopClosure* oops,
+                            CLDClosure* clds,
+                            CodeBlobClosure* blobs);
+  // Apply oops, clds and blobs to strongly and weakly reachable roots in the system
+  void process_all_roots(OopClosure* oops,
+                         CLDClosure* clds,
+                         CodeBlobClosure* blobs);
+  // Apply scan_rs to all locations in the union of the remembered sets for all
+  // regions in the collection set
+  // (having done "set_region" to indicate the region in which the root resides),
+  void scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs,
+                            OopClosure* scan_non_heap_weak_roots,
+                            uint worker_i);
+  // Inform the root processor about the number of worker threads
+  void set_num_workers(int active_workers);
+};
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP
--- a/src/share/vm/gc_implementation/g1/g1StringDedup.cpp
+++ b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp
@@ -105,7 +105,7 @@ void G1StringDedup::deduplicate(oop java_string) {
 void G1StringDedup::oops_do(OopClosure* keep_alive) {
  assert(is_enabled(), "String deduplication not enabled");
-  unlink_or_oops_do(NULL, keep_alive);
+  unlink_or_oops_do(NULL, keep_alive, true /* allow_resize_and_rehash */);
 }
 void G1StringDedup::unlink(BoolObjectClosure* is_alive) {
@@ -122,37 +122,35 @@ void G1StringDedup::unlink(BoolObjectClosure* is_alive) {
 class G1StringDedupUnlinkOrOopsDoTask : public AbstractGangTask {
 private:
  G1StringDedupUnlinkOrOopsDoClosure _cl;
+  G1GCPhaseTimes* _phase_times;
 public:
  G1StringDedupUnlinkOrOopsDoTask(BoolObjectClosure* is_alive,
                                  OopClosure* keep_alive,
-                                  bool allow_resize_and_rehash) :
+                                  bool allow_resize_and_rehash,
+                                  G1GCPhaseTimes* phase_times) :
    AbstractGangTask("G1StringDedupUnlinkOrOopsDoTask"),
-    _cl(is_alive, keep_alive, allow_resize_and_rehash) {
+    _cl(is_alive, keep_alive, allow_resize_and_rehash), _phase_times(phase_times) { }
-  }
  virtual void work(uint worker_id) {
-    double queue_fixup_start = os::elapsedTime();
+    {
-    G1StringDedupQueue::unlink_or_oops_do(&_cl);
+      G1GCParPhaseTimesTracker x(_phase_times, G1GCPhaseTimes::StringDedupQueueFixup, worker_id);
+      G1StringDedupQueue::unlink_or_oops_do(&_cl);
-    double table_fixup_start = os::elapsedTime();
+    }
-    G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id);
+    {
+      G1GCParPhaseTimesTracker x(_phase_times, G1GCPhaseTimes::StringDedupTableFixup, worker_id);
-    double queue_fixup_time_ms = (table_fixup_start - queue_fixup_start) * 1000.0;
+      G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id);
-    double table_fixup_time_ms = (os::elapsedTime() - table_fixup_start) * 1000.0;
+    }
-    G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
-    g1p->phase_times()->record_string_dedup_queue_fixup_worker_time(worker_id, queue_fixup_time_ms);
-    g1p->phase_times()->record_string_dedup_table_fixup_worker_time(worker_id, table_fixup_time_ms);
  }
 };
-void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, bool allow_resize_and_rehash) {
+void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive,
+                                      OopClosure* keep_alive,
+                                      bool allow_resize_and_rehash,
+                                      G1GCPhaseTimes* phase_times) {
  assert(is_enabled(), "String deduplication not enabled");
-  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
-  g1p->phase_times()->note_string_dedup_fixup_start();
-  double fixup_start = os::elapsedTime();
-  G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash);
+  G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash, phase_times);
  if (G1CollectedHeap::use_parallel_gc_threads()) {
    G1CollectedHeap* g1h = G1CollectedHeap::heap();
    g1h->set_par_threads();
@@ -161,10 +159,6 @@ void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* k
  } else {
    task.work(0);
  }
-  double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0;
-  g1p->phase_times()->record_string_dedup_fixup_time(fixup_time_ms);
-  g1p->phase_times()->note_string_dedup_fixup_end();
 }
 void G1StringDedup::threads_do(ThreadClosure* tc) {

--- a/src/share/vm/gc_implementation/g1/g1StringDedup.hpp
+++ b/src/share/vm/gc_implementation/g1/g1StringDedup.hpp
@@ -90,6 +90,7 @@ class BoolObjectClosure;
 class ThreadClosure;
 class outputStream;
 class G1StringDedupTable;
+class G1GCPhaseTimes;
 //
 // Main interface for interacting with string deduplication.
@@ -130,7 +131,7 @@ public:
  static void oops_do(OopClosure* keep_alive);
  static void unlink(BoolObjectClosure* is_alive);
  static void unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive,
-                                bool allow_resize_and_rehash = true);
+                                bool allow_resize_and_rehash, G1GCPhaseTimes* phase_times = NULL);
  static void threads_do(ThreadClosure* tc);
  static void print_worker_threads_on(outputStream* st);

--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp
@@ -217,14 +217,6 @@
  product(uintx, G1HeapRegionSize, 0,                                       \
          "Size of the G1 regions.")                                        \
                                                                            \
-  experimental(bool, G1UseParallelRSetUpdating, true,                       \
-          "Enables the parallelization of remembered set updating "         \
-          "during evacuation pauses")                                       \
-                                                                            \
-  experimental(bool, G1UseParallelRSetScanning, true,                       \
-          "Enables the parallelization of remembered set scanning "         \
-          "during evacuation pauses")                                       \
-                                                                            \
  product(uintx, G1ConcRefinementThreads, 0,                                \
          "If non-0 is the number of parallel rem set update threads, "     \
          "otherwise the value is determined ergonomically.")               \

--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp
 /*
- * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -47,93 +47,55 @@ size_t HeapRegion::GrainWords        = 0;
 size_t HeapRegion::CardsPerRegion    = 0;
 HeapRegionDCTOC::HeapRegionDCTOC(G1CollectedHeap* g1,
-                                 HeapRegion* hr, ExtendedOopClosure* cl,
+                                 HeapRegion* hr,
-                                 CardTableModRefBS::PrecisionStyle precision,
+                                 G1ParPushHeapRSClosure* cl,
-                                 FilterKind fk) :
+                                 CardTableModRefBS::PrecisionStyle precision) :
  DirtyCardToOopClosure(hr, cl, precision, NULL),
-  _hr(hr), _fk(fk), _g1(g1) { }
+  _hr(hr), _rs_scan(cl), _g1(g1) { }
 FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r,
                                                   OopClosure* oc) :
  _r_bottom(r->bottom()), _r_end(r->end()), _oc(oc) { }
-template<class ClosureType>
-HeapWord* walk_mem_region_loop(ClosureType* cl, G1CollectedHeap* g1h,
-                               HeapRegion* hr,
-                               HeapWord* cur, HeapWord* top) {
-  oop cur_oop = oop(cur);
-  size_t oop_size = hr->block_size(cur);
-  HeapWord* next_obj = cur + oop_size;
-  while (next_obj < top) {
-    // Keep filtering the remembered set.
-    if (!g1h->is_obj_dead(cur_oop, hr)) {
-      // Bottom lies entirely below top, so we can call the
-      // non-memRegion version of oop_iterate below.
-      cur_oop->oop_iterate(cl);
-    }
-    cur = next_obj;
-    cur_oop = oop(cur);
-    oop_size = hr->block_size(cur);
-    next_obj = cur + oop_size;
-  }
-  return cur;
-}
 void HeapRegionDCTOC::walk_mem_region(MemRegion mr,
                                      HeapWord* bottom,
                                      HeapWord* top) {
  G1CollectedHeap* g1h = _g1;
  size_t oop_size;
-  ExtendedOopClosure* cl2 = NULL;
+  HeapWord* cur = bottom;
-  FilterIntoCSClosure intoCSFilt(this, g1h, _cl);
-  FilterOutOfRegionClosure outOfRegionFilt(_hr, _cl);
-  switch (_fk) {
-  case NoFilterKind:          cl2 = _cl; break;
-  case IntoCSFilterKind:      cl2 = &intoCSFilt; break;
-  case OutOfRegionFilterKind: cl2 = &outOfRegionFilt; break;
-  default:                    ShouldNotReachHere();
-  }
  // Start filtering what we add to the remembered set. If the object is
  // not considered dead, either because it is marked (in the mark bitmap)
  // or it was allocated after marking finished, then we add it. Otherwise
  // we can safely ignore the object.
-  if (!g1h->is_obj_dead(oop(bottom), _hr)) {
+  if (!g1h->is_obj_dead(oop(cur), _hr)) {
-    oop_size = oop(bottom)->oop_iterate(cl2, mr);
+    oop_size = oop(cur)->oop_iterate(_rs_scan, mr);
  } else {
-    oop_size = _hr->block_size(bottom);
+    oop_size = _hr->block_size(cur);
  }
-  bottom += oop_size;
+  cur += oop_size;
-  if (bottom < top) {
+  if (cur < top) {
-    // We replicate the loop below for several kinds of possible filters.
+    oop cur_oop = oop(cur);
-    switch (_fk) {
+    oop_size = _hr->block_size(cur);
-    case NoFilterKind:
+    HeapWord* next_obj = cur + oop_size;
-      bottom = walk_mem_region_loop(_cl, g1h, _hr, bottom, top);
+    while (next_obj < top) {
-      break;
+      // Keep filtering the remembered set.
+      if (!g1h->is_obj_dead(cur_oop, _hr)) {
-    case IntoCSFilterKind: {
+        // Bottom lies entirely below top, so we can call the
-      FilterIntoCSClosure filt(this, g1h, _cl);
+        // non-memRegion version of oop_iterate below.
-      bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
+        cur_oop->oop_iterate(_rs_scan);
-      break;
+      }
-    }
+      cur = next_obj;
+      cur_oop = oop(cur);
-    case OutOfRegionFilterKind: {
+      oop_size = _hr->block_size(cur);
-      FilterOutOfRegionClosure filt(_hr, _cl);
+      next_obj = cur + oop_size;
-      bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
-      break;
-    }
-    default:
-      ShouldNotReachHere();
    }
    // Last object. Need to do dead-obj filtering here too.
-    if (!g1h->is_obj_dead(oop(bottom), _hr)) {
+    if (!g1h->is_obj_dead(oop(cur), _hr)) {
-      oop(bottom)->oop_iterate(cl2, mr);
+      oop(cur)->oop_iterate(_rs_scan, mr);
    }
  }
 }
@@ -338,7 +300,7 @@ void HeapRegion::initialize(MemRegion mr, bool clear_space, bool mangle_space) {
  _orig_end = mr.end();
  hr_clear(false /*par*/, false /*clear_space*/);
  set_top(bottom());
-  record_top_and_timestamp();
+  record_timestamp();
 }
 CompactibleSpace* HeapRegion::next_compaction_space() const {
@@ -426,9 +388,9 @@ oops_on_card_seq_iterate_careful(MemRegion mr,
  // If we're within a stop-world GC, then we might look at a card in a
  // GC alloc region that extends onto a GC LAB, which may not be
-  // parseable.  Stop such at the "saved_mark" of the region.
+  // parseable.  Stop such at the "scan_top" of the region.
  if (g1h->is_gc_active()) {
-    mr = mr.intersection(used_region_at_save_marks());
+    mr = mr.intersection(MemRegion(bottom(), scan_top()));
  } else {
    mr = mr.intersection(used_region());
  }
@@ -468,7 +430,7 @@ oops_on_card_seq_iterate_careful(MemRegion mr,
  oop obj;
  HeapWord* next = cur;
-  while (next <= start) {
+  do {
    cur = next;
    obj = oop(cur);
    if (obj->klass_or_null() == NULL) {
@@ -477,45 +439,38 @@ oops_on_card_seq_iterate_careful(MemRegion mr,
    }
    // Otherwise...
    next = cur + block_size(cur);
-  }
+  } while (next <= start);
  // If we finish the above loop...We have a parseable object that
  // begins on or before the start of the memory region, and ends
  // inside or spans the entire region.
-  assert(obj == oop(cur), "sanity");
  assert(cur <= start, "Loop postcondition");
  assert(obj->klass_or_null() != NULL, "Loop postcondition");
-  assert((cur + block_size(cur)) > start, "Loop postcondition");
-  if (!g1h->is_obj_dead(obj)) {
-    obj->oop_iterate(cl, mr);
-  }
-  while (cur < end) {
+  do {
    obj = oop(cur);
+    assert((cur + block_size(cur)) > (HeapWord*)obj, "Loop invariant");
    if (obj->klass_or_null() == NULL) {
      // Ran into an unparseable point.
      return cur;
-    };
+    }
-    // Otherwise:
+    // Advance the current pointer. "obj" still points to the object to iterate.
-    next = cur + block_size(cur);
+    cur = cur + block_size(cur);
    if (!g1h->is_obj_dead(obj)) {
-      if (next < end || !obj->is_objArray()) {
+      // Non-objArrays are sometimes marked imprecise at the object start. We
-        // This object either does not span the MemRegion
+      // always need to iterate over them in full.
-        // boundary, or if it does it's not an array.
+      // We only iterate over object arrays in full if they are completely contained
-        // Apply closure to whole object.
+      // in the memory region.
+      if (!obj->is_objArray() || (((HeapWord*)obj) >= start && cur <= end)) {
        obj->oop_iterate(cl);
      } else {
-        // This obj is an array that spans the boundary.
-        // Stop at the boundary.
        obj->oop_iterate(cl, mr);
      }
    }
-    cur = next;
+  } while (cur < end);
-  }
  return NULL;
 }
@@ -980,7 +935,7 @@ void HeapRegion::verify() const {
 void G1OffsetTableContigSpace::clear(bool mangle_space) {
  set_top(bottom());
-  set_saved_mark_word(bottom());
+  _scan_top = bottom();
  CompactibleSpace::clear(mangle_space);
  reset_bot();
 }
@@ -1012,41 +967,42 @@ HeapWord* G1OffsetTableContigSpace::cross_threshold(HeapWord* start,
  return _offsets.threshold();
 }
-HeapWord* G1OffsetTableContigSpace::saved_mark_word() const {
+HeapWord* G1OffsetTableContigSpace::scan_top() const {
  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  assert( _gc_time_stamp <= g1h->get_gc_time_stamp(), "invariant" );
  HeapWord* local_top = top();
  OrderAccess::loadload();
-  if (_gc_time_stamp < g1h->get_gc_time_stamp()) {
+  const unsigned local_time_stamp = _gc_time_stamp;
+  assert(local_time_stamp <= g1h->get_gc_time_stamp(), "invariant");
+  if (local_time_stamp < g1h->get_gc_time_stamp()) {
    return local_top;
  } else {
-    return Space::saved_mark_word();
+    return _scan_top;
  }
 }
-void G1OffsetTableContigSpace::record_top_and_timestamp() {
+void G1OffsetTableContigSpace::record_timestamp() {
  G1CollectedHeap* g1h = G1CollectedHeap::heap();
  unsigned curr_gc_time_stamp = g1h->get_gc_time_stamp();
  if (_gc_time_stamp < curr_gc_time_stamp) {
-    // The order of these is important, as another thread might be
+    // Setting the time stamp here tells concurrent readers to look at
-    // about to start scanning this region. If it does so after
+    // scan_top to know the maximum allowed address to look at.
-    // set_saved_mark and before _gc_time_stamp = ..., then the latter
-    // will be false, and it will pick up top() as the high water mark
+    // scan_top should be bottom for all regions except for the
-    // of region. If it does so after _gc_time_stamp = ..., then it
+    // retained old alloc region which should have scan_top == top
-    // will pick up the right saved_mark_word() as the high water mark
+    HeapWord* st = _scan_top;
-    // of the region. Either way, the behaviour will be correct.
+    guarantee(st == _bottom || st == _top, "invariant");
-    Space::set_saved_mark_word(top());
-    OrderAccess::storestore();
    _gc_time_stamp = curr_gc_time_stamp;
-    // No need to do another barrier to flush the writes above. If
-    // this is called in parallel with other threads trying to
-    // allocate into the region, the caller should call this while
-    // holding a lock and when the lock is released the writes will be
-    // flushed.
  }
 }
+void G1OffsetTableContigSpace::record_retained_region() {
+  // scan_top is the maximum address where it's safe for the next gc to
+  // scan this region.
+  _scan_top = top();
+}
 void G1OffsetTableContigSpace::safe_object_iterate(ObjectClosure* blk) {
  object_iterate(blk);
 }
@@ -1080,6 +1036,8 @@ G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
 void G1OffsetTableContigSpace::initialize(MemRegion mr, bool clear_space, bool mangle_space) {
  CompactibleSpace::initialize(mr, clear_space, mangle_space);
  _top = bottom();
+  _scan_top = bottom();
+  set_saved_mark_word(NULL);
  reset_bot();
 }
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp
@@ -67,17 +67,9 @@ class nmethod;
 // sets.
 class HeapRegionDCTOC : public DirtyCardToOopClosure {
-public:
+private:
-  // Specification of possible DirtyCardToOopClosure filtering.
-  enum FilterKind {
-    NoFilterKind,
-    IntoCSFilterKind,
-    OutOfRegionFilterKind
-  };
-protected:
  HeapRegion* _hr;
-  FilterKind _fk;
+  G1ParPushHeapRSClosure* _rs_scan;
  G1CollectedHeap* _g1;
  // Walk the given memory region from bottom to (actual) top
@@ -90,9 +82,9 @@ protected:
 public:
  HeapRegionDCTOC(G1CollectedHeap* g1,
-                  HeapRegion* hr, ExtendedOopClosure* cl,
+                  HeapRegion* hr,
-                  CardTableModRefBS::PrecisionStyle precision,
+                  G1ParPushHeapRSClosure* cl,
-                  FilterKind fk);
+                  CardTableModRefBS::PrecisionStyle precision);
 };
 // The complicating factor is that BlockOffsetTable diverged
@@ -101,28 +93,25 @@ public:
 // OffsetTableContigSpace.  If the two versions of BlockOffsetTable could
 // be reconciled, then G1OffsetTableContigSpace could go away.
-// The idea behind time stamps is the following. Doing a save_marks on
+// The idea behind time stamps is the following. We want to keep track of
-// all regions at every GC pause is time consuming (if I remember
+// the highest address where it's safe to scan objects for each region.
-// well, 10ms or so). So, we would like to do that only for regions
+// This is only relevant for current GC alloc regions so we keep a time stamp
-// that are GC alloc regions. To achieve this, we use time
+// per region to determine if the region has been allocated during the current
-// stamps. For every evacuation pause, G1CollectedHeap generates a
+// GC or not. If the time stamp is current we report a scan_top value which
-// unique time stamp (essentially a counter that gets
+// was saved at the end of the previous GC for retained alloc regions and which is
-// incremented). Every time we want to call save_marks on a region,
+// equal to the bottom for all other regions.
-// we set the saved_mark_word to top and also copy the current GC
+// There is a race between card scanners and allocating gc workers where we must ensure
-// time stamp to the time stamp field of the space. Reading the
+// that card scanners do not read the memory allocated by the gc workers.
-// saved_mark_word involves checking the time stamp of the
+// In order to enforce that, we must not return a value of _top which is more recent than the
-// region. If it is the same as the current GC time stamp, then we
+// time stamp. This is due to the fact that a region may become a gc alloc region at
-// can safely read the saved_mark_word field, as it is valid. If the
+// some point after we've read the timestamp value as being < the current time stamp.
-// time stamp of the region is not the same as the current GC time
+// The time stamps are re-initialized to zero at cleanup and at Full GCs.
-// stamp, then we instead read top, as the saved_mark_word field is
+// The current scheme that uses sequential unsigned ints will fail only if we have 4b
-// invalid. Time stamps (on the regions and also on the
-// G1CollectedHeap) are reset at every cleanup (we iterate over
-// the regions anyway) and at the end of a Full GC. The current scheme
-// that uses sequential unsigned ints will fail only if we have 4b
 // evacuation pauses between two cleanups, which is _highly_ unlikely.
 class G1OffsetTableContigSpace: public CompactibleSpace {
  friend class VMStructs;
  HeapWord* _top;
+  HeapWord* volatile _scan_top;
 protected:
  G1BlockOffsetArrayContigSpace _offsets;
  Mutex _par_alloc_lock;
@@ -166,10 +155,11 @@ class G1OffsetTableContigSpace: public CompactibleSpace {
  void set_bottom(HeapWord* value);
  void set_end(HeapWord* value);
-  virtual HeapWord* saved_mark_word() const;
+  HeapWord* scan_top() const;
-  void record_top_and_timestamp();
+  void record_timestamp();
  void reset_gc_time_stamp() { _gc_time_stamp = 0; }
  unsigned get_gc_time_stamp() { return _gc_time_stamp; }
+  void record_retained_region();
  // See the comment above in the declaration of _pre_dummy_top for an
  // explanation of what it is.
@@ -193,6 +183,8 @@ class G1OffsetTableContigSpace: public CompactibleSpace {
  virtual HeapWord* allocate(size_t word_size);
  HeapWord* par_allocate(size_t word_size);
+  HeapWord* saved_mark_word() const { ShouldNotReachHere(); return NULL; }
  // MarkSweep support phase3
  virtual HeapWord* initialize_threshold();
  virtual HeapWord* cross_threshold(HeapWord* start, HeapWord* end);

--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
@@ -622,7 +622,7 @@ void ParNewGenTask::work(uint worker_id) {
                         true,  // Process younger gens, if any,
                                // as strong roots.
                         false, // no scope; this is parallel code
-                         SharedHeap::SO_ScavengeCodeCache,
+                         GenCollectedHeap::SO_ScavengeCodeCache,
                         GenCollectedHeap::StrongAndWeakRoots,
                         &par_scan_state.to_space_root_closure(),
                         &par_scan_state.older_gen_closure(),

--- a/src/share/vm/gc_implementation/shared/ageTable.hpp
+++ b/src/share/vm/gc_implementation/shared/ageTable.hpp
@@ -55,7 +55,10 @@ class ageTable VALUE_OBJ_CLASS_SPEC {
  // add entry
  void add(oop p, size_t oop_size) {
-    uint age = p->age();
+    add(p->age(), oop_size);
+  }
+  void add(uint age, size_t oop_size) {
    assert(age > 0 && age < table_size, "invalid age of object");
    sizes[age] += oop_size;
  }

--- a/src/share/vm/interpreter/bytecodes.hpp
+++ b/src/share/vm/interpreter/bytecodes.hpp
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -420,6 +420,7 @@ class Bytecodes: AllStatic {
  static bool        is_astore      (Code code)    { return (code == _astore || code == _astore_0 || code == _astore_1
                                                                             || code == _astore_2 || code == _astore_3); }
+  static bool        is_store_into_local(Code code){ return (_istore <= code && code <= _astore_3); }
  static bool        is_const       (Code code)    { return (_aconst_null <= code && code <= _ldc2_w); }
  static bool        is_zero_const  (Code code)    { return (code == _aconst_null || code == _iconst_0
                                                           || code == _fconst_0 || code == _dconst_0); }

--- a/src/share/vm/interpreter/oopMapCache.cpp
+++ b/src/share/vm/interpreter/oopMapCache.cpp
@@ -244,10 +244,8 @@ void InterpreterOopMap::print() const {
  method()->print_value();
  tty->print(" @ %d = [%d] { ", bci(), n);
  for (int i = 0; i < n; i++) {
-#ifdef ENABLE_ZAP_DEAD_LOCALS
    if (is_dead(i)) tty->print("%d+ ", i);
    else
-#endif
    if (is_oop(i)) tty->print("%d ", i);
  }
  tty->print_cr("}");
@@ -402,13 +400,11 @@ void OopMapCacheEntry::set_mask(CellTypeState *vars, CellTypeState *stack, int s
      value |= (mask << oop_bit_number );
    }
-  #ifdef ENABLE_ZAP_DEAD_LOCALS
    // set dead bit
    if (!cell->is_live()) {
      value |= (mask << dead_bit_number);
      assert(!cell->is_reference(), "dead value marked as oop");
    }
-  #endif
  }
  // make sure last word is stored

--- a/src/share/vm/interpreter/oopMapCache.hpp
+++ b/src/share/vm/interpreter/oopMapCache.hpp
@@ -66,19 +66,15 @@ class InterpreterOopMap: ResourceObj {
 public:
  enum {
-    N                = 2,                // the number of words reserved
+    N                = 4,                // the number of words reserved
                                         // for inlined mask storage
    small_mask_limit = N * BitsPerWord,  // the maximum number of bits
                                         // available for small masks,
                                         // small_mask_limit can be set to 0
                                         // for testing bit_mask allocation
-#ifdef ENABLE_ZAP_DEAD_LOCALS
    bits_per_entry   = 2,
    dead_bit_number  = 1,
-#else
-    bits_per_entry   = 1,
-#endif
    oop_bit_number   = 0
  };
@@ -119,10 +115,6 @@ class InterpreterOopMap: ResourceObj {
  void set_expression_stack_size(int sz)         { _expression_stack_size = sz; }
-#ifdef ENABLE_ZAP_DEAD_LOCALS
-  bool is_dead(int offset) const                 { return (entry_at(offset) & (1 << dead_bit_number)) != 0; }
-#endif
  // Lookup
  bool match(methodHandle method, int bci) const { return _method == method() && _bci == bci; }
  bool is_empty() const;
@@ -144,6 +136,7 @@ class InterpreterOopMap: ResourceObj {
  void print() const;
  int number_of_entries() const                  { return mask_size() / bits_per_entry; }
+  bool is_dead(int offset) const                 { return (entry_at(offset) & (1 << dead_bit_number)) != 0; }
  bool is_oop (int offset) const                 { return (entry_at(offset) & (1 << oop_bit_number )) != 0; }
  int expression_stack_size() const              { return _expression_stack_size; }

--- a/src/share/vm/memory/defNewGeneration.cpp
+++ b/src/share/vm/memory/defNewGeneration.cpp
@@ -629,7 +629,7 @@ void DefNewGeneration::collect(bool   full,
                         true,  // Process younger gens, if any,
                                // as strong roots.
                         true,  // activate StrongRootsScope
-                         SharedHeap::SO_ScavengeCodeCache,
+                         GenCollectedHeap::SO_ScavengeCodeCache,
                         GenCollectedHeap::StrongAndWeakRoots,
                         &fsc_with_no_gc_barrier,
                         &fsc_with_gc_barrier,

--- a/src/share/vm/memory/genCollectedHeap.cpp
+++ b/src/share/vm/memory/genCollectedHeap.cpp
@@ -26,6 +26,7 @@
 #include "classfile/symbolTable.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
+#include "code/codeCache.hpp"
 #include "code/icBuffer.hpp"
 #include "gc_implementation/shared/collectorCounters.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
@@ -49,6 +50,7 @@
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
 #include "runtime/vmThread.hpp"
+#include "services/management.hpp"
 #include "services/memoryService.hpp"
 #include "utilities/vmError.hpp"
 #include "utilities/workgroup.hpp"
@@ -63,7 +65,15 @@ NOT_PRODUCT(size_t GenCollectedHeap::_skip_header_HeapWords = 0;)
 // The set of potentially parallel tasks in root scanning.
 enum GCH_strong_roots_tasks {
-  // We probably want to parallelize both of these internally, but for now...
+  GCH_PS_Universe_oops_do,
+  GCH_PS_JNIHandles_oops_do,
+  GCH_PS_ObjectSynchronizer_oops_do,
+  GCH_PS_FlatProfiler_oops_do,
+  GCH_PS_Management_oops_do,
+  GCH_PS_SystemDictionary_oops_do,
+  GCH_PS_ClassLoaderDataGraph_oops_do,
+  GCH_PS_jvmti_oops_do,
+  GCH_PS_CodeCache_oops_do,
  GCH_PS_younger_gens,
  // Leave this one last.
  GCH_PS_NumElements
@@ -72,13 +82,9 @@ enum GCH_strong_roots_tasks {
 GenCollectedHeap::GenCollectedHeap(GenCollectorPolicy *policy) :
  SharedHeap(policy),
  _gen_policy(policy),
-  _gen_process_roots_tasks(new SubTasksDone(GCH_PS_NumElements)),
+  _process_strong_tasks(new SubTasksDone(GCH_PS_NumElements)),
  _full_collections_completed(0)
 {
-  if (_gen_process_roots_tasks == NULL ||
-      !_gen_process_roots_tasks->valid()) {
-    vm_exit_during_initialization("Failed necessary allocation.");
-  }
  assert(policy != NULL, "Sanity check");
 }
@@ -589,29 +595,137 @@ HeapWord* GenCollectedHeap::satisfy_failed_allocation(size_t size, bool is_tlab)
 void GenCollectedHeap::set_par_threads(uint t) {
  SharedHeap::set_par_threads(t);
-  _gen_process_roots_tasks->set_n_threads(t);
+  set_n_termination(t);
+}
+void GenCollectedHeap::set_n_termination(uint t) {
+  _process_strong_tasks->set_n_threads(t);
 }
-void GenCollectedHeap::
+#ifdef ASSERT
-gen_process_roots(int level,
+class AssertNonScavengableClosure: public OopClosure {
-                  bool younger_gens_as_roots,
+public:
-                  bool activate_scope,
+  virtual void do_oop(oop* p) {
-                  SharedHeap::ScanningOption so,
+    assert(!Universe::heap()->is_in_partial_collection(*p),
-                  OopsInGenClosure* not_older_gens,
+      "Referent should not be scavengable.");  }
-                  OopsInGenClosure* weak_roots,
+  virtual void do_oop(narrowOop* p) { ShouldNotReachHere(); }
-                  OopsInGenClosure* older_gens,
+};
-                  CLDClosure* cld_closure,
+static AssertNonScavengableClosure assert_is_non_scavengable_closure;
-                  CLDClosure* weak_cld_closure,
+#endif
-                  CodeBlobClosure* code_closure) {
+void GenCollectedHeap::process_roots(bool activate_scope,
+                                     ScanningOption so,
+                                     OopClosure* strong_roots,
+                                     OopClosure* weak_roots,
+                                     CLDClosure* strong_cld_closure,
+                                     CLDClosure* weak_cld_closure,
+                                     CodeBlobClosure* code_roots) {
+  StrongRootsScope srs(this, activate_scope);
  // General roots.
-  SharedHeap::process_roots(activate_scope, so,
+  assert(_strong_roots_parity != 0, "must have called prologue code");
-                            not_older_gens, weak_roots,
+  assert(code_roots != NULL, "code root closure should always be set");
-                            cld_closure, weak_cld_closure,
+  // _n_termination for _process_strong_tasks should be set up stream
-                            code_closure);
+  // in a method not running in a GC worker.  Otherwise the GC worker
+  // could be trying to change the termination condition while the task
+  // is executing in another GC worker.
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_ClassLoaderDataGraph_oops_do)) {
+    ClassLoaderDataGraph::roots_cld_do(strong_cld_closure, weak_cld_closure);
+  }
+  // Some CLDs contained in the thread frames should be considered strong.
+  // Don't process them if they will be processed during the ClassLoaderDataGraph phase.
+  CLDClosure* roots_from_clds_p = (strong_cld_closure != weak_cld_closure) ? strong_cld_closure : NULL;
+  // Only process code roots from thread stacks if we aren't visiting the entire CodeCache anyway
+  CodeBlobClosure* roots_from_code_p = (so & SO_AllCodeCache) ? NULL : code_roots;
+  Threads::possibly_parallel_oops_do(strong_roots, roots_from_clds_p, roots_from_code_p);
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_Universe_oops_do)) {
+    Universe::oops_do(strong_roots);
+  }
+  // Global (strong) JNI handles
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_JNIHandles_oops_do)) {
+    JNIHandles::oops_do(strong_roots);
+  }
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_ObjectSynchronizer_oops_do)) {
+    ObjectSynchronizer::oops_do(strong_roots);
+  }
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_FlatProfiler_oops_do)) {
+    FlatProfiler::oops_do(strong_roots);
+  }
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_Management_oops_do)) {
+    Management::oops_do(strong_roots);
+  }
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_jvmti_oops_do)) {
+    JvmtiExport::oops_do(strong_roots);
+  }
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_SystemDictionary_oops_do)) {
+    SystemDictionary::roots_oops_do(strong_roots, weak_roots);
+  }
+  // All threads execute the following. A specific chunk of buckets
+  // from the StringTable are the individual tasks.
+  if (weak_roots != NULL) {
+    if (CollectedHeap::use_parallel_gc_threads()) {
+      StringTable::possibly_parallel_oops_do(weak_roots);
+    } else {
+      StringTable::oops_do(weak_roots);
+    }
+  }
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_CodeCache_oops_do)) {
+    if (so & SO_ScavengeCodeCache) {
+      assert(code_roots != NULL, "must supply closure for code cache");
+      // We only visit parts of the CodeCache when scavenging.
+      CodeCache::scavenge_root_nmethods_do(code_roots);
+    }
+    if (so & SO_AllCodeCache) {
+      assert(code_roots != NULL, "must supply closure for code cache");
+      // CMSCollector uses this to do intermediate-strength collections.
+      // We scan the entire code cache, since CodeCache::do_unloading is not called.
+      CodeCache::blobs_do(code_roots);
+    }
+    // Verify that the code cache contents are not subject to
+    // movement by a scavenging collection.
+    DEBUG_ONLY(CodeBlobToOopClosure assert_code_is_non_scavengable(&assert_is_non_scavengable_closure, !CodeBlobToOopClosure::FixRelocations));
+    DEBUG_ONLY(CodeCache::asserted_non_scavengable_nmethods_do(&assert_code_is_non_scavengable));
+  }
+}
+void GenCollectedHeap::gen_process_roots(int level,
+                                         bool younger_gens_as_roots,
+                                         bool activate_scope,
+                                         ScanningOption so,
+                                         bool only_strong_roots,
+                                         OopsInGenClosure* not_older_gens,
+                                         OopsInGenClosure* older_gens,
+                                         CLDClosure* cld_closure) {
+  const bool is_adjust_phase = !only_strong_roots && !younger_gens_as_roots;
+  bool is_moving_collection = false;
+  if (level == 0 || is_adjust_phase) {
+    // young collections are always moving
+    is_moving_collection = true;
+  }
+  MarkingCodeBlobClosure mark_code_closure(not_older_gens, is_moving_collection);
+  OopsInGenClosure* weak_roots = only_strong_roots ? NULL : not_older_gens;
+  CLDClosure* weak_cld_closure = only_strong_roots ? NULL : cld_closure;
+  process_roots(activate_scope, so,
+                not_older_gens, weak_roots,
+                cld_closure, weak_cld_closure,
+                &mark_code_closure);
  if (younger_gens_as_roots) {
-    if (!_gen_process_roots_tasks->is_task_claimed(GCH_PS_younger_gens)) {
+    if (!_process_strong_tasks->is_task_claimed(GCH_PS_younger_gens)) {
      for (int i = 0; i < level; i++) {
        not_older_gens->set_generation(_gens[i]);
        _gens[i]->oop_iterate(not_older_gens);
@@ -627,43 +741,18 @@ gen_process_roots(int level,
    older_gens->reset_generation();
  }
-  _gen_process_roots_tasks->all_tasks_completed();
+  _process_strong_tasks->all_tasks_completed();
 }
-void GenCollectedHeap::
-gen_process_roots(int level,
-                  bool younger_gens_as_roots,
-                  bool activate_scope,
-                  SharedHeap::ScanningOption so,
-                  bool only_strong_roots,
-                  OopsInGenClosure* not_older_gens,
-                  OopsInGenClosure* older_gens,
-                  CLDClosure* cld_closure) {
-  const bool is_adjust_phase = !only_strong_roots && !younger_gens_as_roots;
-  bool is_moving_collection = false;
-  if (level == 0 || is_adjust_phase) {
-    // young collections are always moving
-    is_moving_collection = true;
-  }
-  MarkingCodeBlobClosure mark_code_closure(not_older_gens, is_moving_collection);
-  CodeBlobClosure* code_closure = &mark_code_closure;
-  gen_process_roots(level,
+class AlwaysTrueClosure: public BoolObjectClosure {
-                    younger_gens_as_roots,
+public:
-                    activate_scope, so,
+  bool do_object_b(oop p) { return true; }
-                    not_older_gens, only_strong_roots ? NULL : not_older_gens,
+};
-                    older_gens,
+static AlwaysTrueClosure always_true;
-                    cld_closure, only_strong_roots ? NULL : cld_closure,
-                    code_closure);
-}
 void GenCollectedHeap::gen_process_weak_roots(OopClosure* root_closure) {
-  SharedHeap::process_weak_roots(root_closure);
+  JNIHandles::weak_oops_do(&always_true, root_closure);
-  // "Local" "weak" refs
  for (int i = 0; i < _n_gens; i++) {
    _gens[i]->ref_processor()->weak_oops_do(root_closure);
  }

--- a/src/share/vm/memory/genCollectedHeap.hpp
+++ b/src/share/vm/memory/genCollectedHeap.hpp
@@ -79,8 +79,7 @@ public:
  // Data structure for claiming the (potentially) parallel tasks in
  // (gen-specific) roots processing.
-  SubTasksDone* _gen_process_roots_tasks;
+  SubTasksDone* _process_strong_tasks;
-  SubTasksDone* gen_process_roots_tasks() { return _gen_process_roots_tasks; }
  // In block contents verification, the number of header words to skip
  NOT_PRODUCT(static size_t _skip_header_HeapWords;)
@@ -390,6 +389,7 @@ public:
  static GenCollectedHeap* heap();
  void set_par_threads(uint t);
+  void set_n_termination(uint t);
  // Invoke the "do_oop" method of one of the closures "not_older_gens"
  // or "older_gens" on root locations for the generation at
@@ -403,11 +403,25 @@ public:
  // The "so" argument determines which of the roots
  // the closure is applied to:
  // "SO_None" does none;
+  enum ScanningOption {
+    SO_None                =  0x0,
+    SO_AllCodeCache        =  0x8,
+    SO_ScavengeCodeCache   = 0x10
+  };
 private:
+  void process_roots(bool activate_scope,
+                     ScanningOption so,
+                     OopClosure* strong_roots,
+                     OopClosure* weak_roots,
+                     CLDClosure* strong_cld_closure,
+                     CLDClosure* weak_cld_closure,
+                     CodeBlobClosure* code_roots);
  void gen_process_roots(int level,
                         bool younger_gens_as_roots,
                         bool activate_scope,
-                         SharedHeap::ScanningOption so,
+                         ScanningOption so,
                         OopsInGenClosure* not_older_gens,
                         OopsInGenClosure* weak_roots,
                         OopsInGenClosure* older_gens,
@@ -422,7 +436,7 @@ public:
  void gen_process_roots(int level,
                         bool younger_gens_as_roots,
                         bool activate_scope,
-                         SharedHeap::ScanningOption so,
+                         ScanningOption so,
                         bool only_strong_roots,
                         OopsInGenClosure* not_older_gens,
                         OopsInGenClosure* older_gens,

--- a/src/share/vm/memory/genMarkSweep.cpp
+++ b/src/share/vm/memory/genMarkSweep.cpp
@@ -210,7 +210,7 @@ void GenMarkSweep::mark_sweep_phase1(int level,
  gch->gen_process_roots(level,
                         false, // Younger gens are not roots.
                         true,  // activate StrongRootsScope
-                         SharedHeap::SO_None,
+                         GenCollectedHeap::SO_None,
                         GenCollectedHeap::StrongRootsOnly,
                         &follow_root_closure,
                         &follow_root_closure,
@@ -295,7 +295,7 @@ void GenMarkSweep::mark_sweep_phase3(int level) {
  gch->gen_process_roots(level,
                         false, // Younger gens are not roots.
                         true,  // activate StrongRootsScope
-                         SharedHeap::SO_AllCodeCache,
+                         GenCollectedHeap::SO_AllCodeCache,
                         GenCollectedHeap::StrongAndWeakRoots,
                         &adjust_pointer_closure,
                         &adjust_pointer_closure,

--- a/src/share/vm/memory/sharedHeap.cpp
+++ b/src/share/vm/memory/sharedHeap.cpp
--- a/src/share/vm/memory/sharedHeap.hpp
+++ b/src/share/vm/memory/sharedHeap.hpp
--- a/src/share/vm/prims/jvmtiImpl.cpp
+++ b/src/share/vm/prims/jvmtiImpl.cpp
--- a/src/share/vm/runtime/mutexLocker.cpp
+++ b/src/share/vm/runtime/mutexLocker.cpp
--- a/src/share/vm/runtime/mutexLocker.hpp
+++ b/src/share/vm/runtime/mutexLocker.hpp
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -137,7 +137,6 @@ extern Mutex*   OldSets_lock;                    // protects the old region sets
 extern Monitor* RootRegionScan_lock;             // used to notify that the CM threads have finished scanning the IM snapshot regions
 extern Mutex*   MMUTracker_lock;                 // protects the MMU
                                                 // tracker data structures
-extern Mutex*   HotCardCache_lock;               // protects the hot card cache
 extern Mutex*   Management_lock;                 // a lock used to serialize JVM management
 extern Monitor* Service_lock;                    // a lock used for service thread operation

--- a/test/gc/g1/TestG1TraceReclaimDeadHumongousObjectsAtYoungGC.java
+++ b/test/gc/g1/TestG1TraceReclaimDeadHumongousObjectsAtYoungGC.java
--- a/test/gc/g1/TestGCLogMessages.java
+++ b/test/gc/g1/TestGCLogMessages.java
--- a/test/runtime/handlerInTry/HandlerInTry.jasm
+++ b/test/runtime/handlerInTry/HandlerInTry.jasm
--- a/test/runtime/handlerInTry/IsolatedHandlerInTry.jasm
+++ b/test/runtime/handlerInTry/IsolatedHandlerInTry.jasm
--- a/test/runtime/handlerInTry/LoadHandlerInTry.java
+++ b/test/runtime/handlerInTry/LoadHandlerInTry.java
--- a/test/runtime/stackMapCheck/BadMap.jasm
+++ b/test/runtime/stackMapCheck/BadMap.jasm
--- a/test/runtime/stackMapCheck/BadMapDstore.jasm
+++ b/test/runtime/stackMapCheck/BadMapDstore.jasm
--- a/test/runtime/stackMapCheck/BadMapIstore.jasm
+++ b/test/runtime/stackMapCheck/BadMapIstore.jasm
--- a/test/runtime/stackMapCheck/StackMapCheck.java
+++ b/test/runtime/stackMapCheck/StackMapCheck.java