Merge

fa7c6955 · jmasa · 732d8c68 · 50619040 · fa7c6955 · fa7c6955
26 changed file
--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp
@@ -262,38 +262,17 @@ CollectionSetChooser::sortMarkedHeapRegions() {
  for (int i = 0; i < _numMarkedRegions; i++) {
    assert(_markedRegions.at(i) != NULL, "Should be true by sorting!");
    _markedRegions.at(i)->set_sort_index(i);
-    if (G1PrintRegionLivenessInfo > 0) {
-      if (i == 0) gclog_or_tty->print_cr("Sorted marked regions:");
-      if (i < G1PrintRegionLivenessInfo ||
-          (_numMarkedRegions-i) < G1PrintRegionLivenessInfo) {
-        HeapRegion* hr = _markedRegions.at(i);
-        size_t u = hr->used();
-        gclog_or_tty->print_cr("  Region %d: %d used, %d max live, %5.2f%%.",
-                      i, u, hr->max_live_bytes(),
-                      100.0*(float)hr->max_live_bytes()/(float)u);
-      }
+  }
+  if (G1PrintRegionLivenessInfo) {
+    G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Sorting");
+    for (int i = 0; i < _numMarkedRegions; ++i) {
+      HeapRegion* r = _markedRegions.at(i);
+      cl.doHeapRegion(r);
    }
  }
-  if (G1PolicyVerbose > 1)
-    printSortedHeapRegions();
  assert(verify(), "should now be sorted");
 }

-void
-printHeapRegion(HeapRegion *hr) {
-  if (hr->isHumongous())
-    gclog_or_tty->print("H: ");
-  if (hr->in_collection_set())
-    gclog_or_tty->print("CS: ");
-  gclog_or_tty->print_cr("Region " PTR_FORMAT " (%s%s) "
-                         "[" PTR_FORMAT ", " PTR_FORMAT"] "
-                         "Used: " SIZE_FORMAT "K, garbage: " SIZE_FORMAT "K.",
-                         hr, hr->is_young() ? "Y " : "  ",
-                         hr->is_marked()? "M1" : "M0",
-                         hr->bottom(), hr->end(),
-                         hr->used()/K, hr->garbage_bytes()/K);
-}
-
 void
 CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) {
  assert(!hr->isHumongous(),
@@ -351,27 +330,9 @@ CollectionSetChooser::clearMarkedHeapRegions(){

 void
 CollectionSetChooser::updateAfterFullCollection() {
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
  clearMarkedHeapRegions();
 }

-void
-CollectionSetChooser::printSortedHeapRegions() {
-  gclog_or_tty->print_cr("Printing %d Heap Regions sorted by amount of known garbage",
-                _numMarkedRegions);
-
-  DEBUG_ONLY(int marked_count = 0;)
-  for (int i = 0; i < _markedRegions.length(); i++) {
-    HeapRegion* r = _markedRegions.at(i);
-    if (r != NULL) {
-      printHeapRegion(r);
-      DEBUG_ONLY(marked_count++;)
-    }
-  }
-  assert(marked_count == _numMarkedRegions, "must be");
-  gclog_or_tty->print_cr("Done sorted heap region print");
-}
-
 void CollectionSetChooser::removeRegion(HeapRegion *hr) {
  int si = hr->sort_index();
  assert(si == -1 || hr->is_marked(), "Sort index not valid.");

--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp
@@ -100,8 +100,6 @@ public:

  CollectionSetChooser();

-  void printSortedHeapRegions();
-
  void sortMarkedHeapRegions();
  void fillCache();
  bool addRegionToCache(void);

--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
@@ -31,23 +31,31 @@
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "memory/space.inline.hpp"
 #include "runtime/atomic.hpp"
+#include "runtime/java.hpp"
 #include "utilities/copy.hpp"

 // Possible sizes for the card counts cache: odd primes that roughly double in size.
 // (See jvmtiTagMap.cpp).
-int ConcurrentG1Refine::_cc_cache_sizes[] = {
-        16381,    32771,    76831,    150001,   307261,
-       614563,  1228891,  2457733,   4915219,  9830479,
-     19660831, 39321619, 78643219, 157286461,       -1
+
+#define MAX_SIZE ((size_t) -1)
+
+size_t ConcurrentG1Refine::_cc_cache_sizes[] = {
+          16381,    32771,    76831,    150001,   307261,
+         614563,  1228891,  2457733,   4915219,  9830479,
+       19660831, 39321619, 78643219, 157286461,  MAX_SIZE
  };

 ConcurrentG1Refine::ConcurrentG1Refine() :
  _card_counts(NULL), _card_epochs(NULL),
-  _n_card_counts(0), _max_n_card_counts(0),
+  _n_card_counts(0), _max_cards(0), _max_n_card_counts(0),
  _cache_size_index(0), _expand_card_counts(false),
  _hot_cache(NULL),
  _def_use_cache(false), _use_cache(false),
-  _n_periods(0),
+  // We initialize the epochs of the array to 0. By initializing
+  // _n_periods to 1 and not 0 we automatically invalidate all the
+  // entries on the array. Otherwise we might accidentally think that
+  // we claimed a card that was in fact never set (see CR7033292).
+  _n_periods(1),
  _threads(NULL), _n_threads(0)
 {

@@ -98,27 +106,44 @@ int ConcurrentG1Refine::thread_num() {
 void ConcurrentG1Refine::init() {
  if (G1ConcRSLogCacheSize > 0) {
    _g1h = G1CollectedHeap::heap();
-    _max_n_card_counts =
-      (unsigned) (_g1h->max_capacity() >> CardTableModRefBS::card_shift);

-    size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;
-    guarantee(_max_n_card_counts < max_card_num, "card_num representation");
+    _max_cards = _g1h->max_capacity() >> CardTableModRefBS::card_shift;
+    _max_n_card_counts = _max_cards * G1MaxHotCardCountSizePercent / 100;

-    int desired = _max_n_card_counts / InitialCacheFraction;
-    for (_cache_size_index = 0;
-              _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) {
-      if (_cc_cache_sizes[_cache_size_index] >= desired) break;
+    size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;
+    guarantee(_max_cards < max_card_num, "card_num representation");
+
+    // We need _n_card_counts to be less than _max_n_card_counts here
+    // so that the expansion call (below) actually allocates the
+    // _counts and _epochs arrays.
+    assert(_n_card_counts == 0, "pre-condition");
+    assert(_max_n_card_counts > 0, "pre-condition");
+
+    // Find the index into cache size array that is of a size that's
+    // large enough to hold desired_sz.
+    size_t desired_sz = _max_cards / InitialCacheFraction;
+    int desired_sz_index = 0;
+    while (_cc_cache_sizes[desired_sz_index] < desired_sz) {
+      desired_sz_index += 1;
+      assert(desired_sz_index <  MAX_CC_CACHE_INDEX, "invariant");
+    }
+    assert(desired_sz_index <  MAX_CC_CACHE_INDEX, "invariant");
+
+    // If the desired_sz value is between two sizes then
+    // _cc_cache_sizes[desired_sz_index-1] < desired_sz <= _cc_cache_sizes[desired_sz_index]
+    // we will start with the lower size in the optimistic expectation that
+    // we will not need to expand up. Note desired_sz_index could also be 0.
+    if (desired_sz_index > 0 &&
+        _cc_cache_sizes[desired_sz_index] > desired_sz) {
+      desired_sz_index -= 1;
    }
-    _cache_size_index = MAX2(0, (_cache_size_index - 1));
-
-    int initial_size = _cc_cache_sizes[_cache_size_index];
-    if (initial_size < 0) initial_size = _max_n_card_counts;
-
-    // Make sure we don't go bigger than we will ever need
-    _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts);

-    _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
-    _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
+    if (!expand_card_count_cache(desired_sz_index)) {
+      // Allocation was unsuccessful - exit
+      vm_exit_during_initialization("Could not reserve enough space for card count cache");
+    }
+    assert(_n_card_counts > 0, "post-condition");
+    assert(_cache_size_index == desired_sz_index, "post-condition");

    Copy::fill_to_bytes(&_card_counts[0],
                        _n_card_counts * sizeof(CardCountCacheEntry));
@@ -163,10 +188,13 @@ void ConcurrentG1Refine::reinitialize_threads() {

 ConcurrentG1Refine::~ConcurrentG1Refine() {
  if (G1ConcRSLogCacheSize > 0) {
+    // Please see the comment in allocate_card_count_cache
+    // for why we call os::malloc() and os::free() directly.
    assert(_card_counts != NULL, "Logic");
-    FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
+    os::free(_card_counts);
    assert(_card_epochs != NULL, "Logic");
-    FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
+    os::free(_card_epochs);
+
    assert(_hot_cache != NULL, "Logic");
    FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
  }
@@ -382,29 +410,93 @@ void ConcurrentG1Refine::clean_up_cache(int worker_i,
  }
 }

-void ConcurrentG1Refine::expand_card_count_cache() {
+// The arrays used to hold the card counts and the epochs must have
+// a 1:1 correspondence. Hence they are allocated and freed together
+// Returns true if the allocations of both the counts and epochs
+// were successful; false otherwise.
+bool ConcurrentG1Refine::allocate_card_count_cache(size_t n,
+                                                   CardCountCacheEntry** counts,
+                                                   CardEpochCacheEntry** epochs) {
+  // We call the allocation/free routines directly for the counts
+  // and epochs arrays. The NEW_C_HEAP_ARRAY/FREE_C_HEAP_ARRAY
+  // macros call AllocateHeap and FreeHeap respectively.
+  // AllocateHeap will call vm_exit_out_of_memory in the event
+  // of an allocation failure and abort the JVM. With the
+  // _counts/epochs arrays we only need to abort the JVM if the
+  // initial allocation of these arrays fails.
+  //
+  // Additionally AllocateHeap/FreeHeap do some tracing of
+  // allocate/free calls so calling one without calling the
+  // other can cause inconsistencies in the tracing. So we
+  // call neither.
+
+  assert(*counts == NULL, "out param");
+  assert(*epochs == NULL, "out param");
+
+  size_t counts_size = n * sizeof(CardCountCacheEntry);
+  size_t epochs_size = n * sizeof(CardEpochCacheEntry);
+
+  *counts = (CardCountCacheEntry*) os::malloc(counts_size);
+  if (*counts == NULL) {
+    // allocation was unsuccessful
+    return false;
+  }
+
+  *epochs = (CardEpochCacheEntry*) os::malloc(epochs_size);
+  if (*epochs == NULL) {
+    // allocation was unsuccessful - free counts array
+    assert(*counts != NULL, "must be");
+    os::free(*counts);
+    *counts = NULL;
+    return false;
+  }
+
+  // We successfully allocated both counts and epochs
+  return true;
+}
+
+// Returns true if the card counts/epochs cache was
+// successfully expanded; false otherwise.
+bool ConcurrentG1Refine::expand_card_count_cache(int cache_size_idx) {
+  // Can we expand the card count and epoch tables?
  if (_n_card_counts < _max_n_card_counts) {
-    int new_idx = _cache_size_index+1;
-    int new_size = _cc_cache_sizes[new_idx];
-    if (new_size < 0) new_size = _max_n_card_counts;
+    assert(cache_size_idx >= 0 && cache_size_idx  < MAX_CC_CACHE_INDEX, "oob");

+    size_t cache_size = _cc_cache_sizes[cache_size_idx];
    // Make sure we don't go bigger than we will ever need
-    new_size = MIN2((unsigned) new_size, _max_n_card_counts);
-
-    // Expand the card count and card epoch tables
-    if (new_size > (int)_n_card_counts) {
-      // We can just free and allocate a new array as we're
-      // not interested in preserving the contents
-      assert(_card_counts != NULL, "Logic!");
-      assert(_card_epochs != NULL, "Logic!");
-      FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
-      FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
-      _n_card_counts = new_size;
-      _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
-      _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
-      _cache_size_index = new_idx;
+    cache_size = MIN2(cache_size, _max_n_card_counts);
+
+    // Should we expand the card count and card epoch tables?
+    if (cache_size > _n_card_counts) {
+      // We have been asked to allocate new, larger, arrays for
+      // the card counts and the epochs. Attempt the allocation
+      // of both before we free the existing arrays in case
+      // the allocation is unsuccessful...
+      CardCountCacheEntry* counts = NULL;
+      CardEpochCacheEntry* epochs = NULL;
+
+      if (allocate_card_count_cache(cache_size, &counts, &epochs)) {
+        // Allocation was successful.
+        // We can just free the old arrays; we're
+        // not interested in preserving the contents
+        if (_card_counts != NULL) os::free(_card_counts);
+        if (_card_epochs != NULL) os::free(_card_epochs);
+
+        // Cache the size of the arrays and the index that got us there.
+        _n_card_counts = cache_size;
+        _cache_size_index = cache_size_idx;
+
+        _card_counts = counts;
+        _card_epochs = epochs;
+
+        // We successfully allocated/expanded the caches.
+        return true;
+      }
    }
  }
+
+  // We did not successfully expand the caches.
+  return false;
 }

 void ConcurrentG1Refine::clear_and_record_card_counts() {
@@ -415,10 +507,16 @@ void ConcurrentG1Refine::clear_and_record_card_counts() {
 #endif

  if (_expand_card_counts) {
-    expand_card_count_cache();
+    int new_idx = _cache_size_index + 1;
+
+    if (expand_card_count_cache(new_idx)) {
+      // Allocation was successful and  _n_card_counts has
+      // been updated to the new size. We only need to clear
+      // the epochs so we don't read a bogus epoch value
+      // when inserting a card into the hot card cache.
+      Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
+    }
    _expand_card_counts = false;
-    // Only need to clear the epochs.
-    Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
  }

  int this_epoch = (int) _n_periods;

--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -94,7 +94,7 @@ class ConcurrentG1Refine: public CHeapObj {
  } CardEpochCacheEntry;

  julong make_epoch_entry(unsigned int card_num, unsigned int epoch) {
-    assert(0 <= card_num && card_num < _max_n_card_counts, "Bounds");
+    assert(0 <= card_num && card_num < _max_cards, "Bounds");
    assert(0 <= epoch && epoch <= _n_periods, "must be");

    return ((julong) card_num << card_num_shift) | epoch;
@@ -117,15 +117,24 @@ class ConcurrentG1Refine: public CHeapObj {
  CardEpochCacheEntry* _card_epochs;

  // The current number of buckets in the card count cache
-  unsigned _n_card_counts;
+  size_t _n_card_counts;

-  // The max number of buckets required for the number of
-  // cards for the entire reserved heap
-  unsigned _max_n_card_counts;
+  // The number of cards for the entire reserved heap
+  size_t _max_cards;
+
+  // The max number of buckets for the card counts and epochs caches.
+  // This is the maximum that the counts and epochs will grow to.
+  // It is specified as a fraction or percentage of _max_cards using
+  // G1MaxHotCardCountSizePercent.
+  size_t _max_n_card_counts;

  // Possible sizes of the cache: odd primes that roughly double in size.
  // (See jvmtiTagMap.cpp).
-  static int _cc_cache_sizes[];
+  enum {
+    MAX_CC_CACHE_INDEX = 15    // maximum index into the cache size array.
+  };
+
+  static size_t _cc_cache_sizes[MAX_CC_CACHE_INDEX];

  // The index in _cc_cache_sizes corresponding to the size of
  // _card_counts.
@@ -147,11 +156,22 @@ class ConcurrentG1Refine: public CHeapObj {
  CardTableModRefBS* _ct_bs;
  G1CollectedHeap*   _g1h;

-  // Expands the array that holds the card counts to the next size up
-  void expand_card_count_cache();
+  // Helper routine for expand_card_count_cache().
+  // The arrays used to hold the card counts and the epochs must have
+  // a 1:1 correspondence. Hence they are allocated and freed together.
+  // Returns true if the allocations of both the counts and epochs
+  // were successful; false otherwise.
+  bool allocate_card_count_cache(size_t n,
+                                 CardCountCacheEntry** counts,
+                                 CardEpochCacheEntry** epochs);
+
+  // Expands the arrays that hold the card counts and epochs
+  // to the cache size at index. Returns true if the expansion/
+  // allocation was successful; false otherwise.
+  bool expand_card_count_cache(int index);

  // hash a given key (index of card_ptr) with the specified size
-  static unsigned int hash(size_t key, int size) {
+  static unsigned int hash(size_t key, size_t size) {
    return (unsigned int) key % size;
  }


--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@@ -1204,7 +1204,6 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  g1p->record_concurrent_mark_remark_end();
 }

-
 #define CARD_BM_TEST_MODE 0

 class CalcLiveObjectsClosure: public HeapRegionClosure {
@@ -1726,6 +1725,11 @@ void ConcurrentMark::cleanup() {
  }
  _total_counting_time += this_final_counting_time;

+  if (G1PrintRegionLivenessInfo) {
+    G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
+    _g1h->heap_region_iterate(&cl);
+  }
+
  // Install newly created mark bitMap as "prev".
  swapMarkBitMaps();

@@ -3199,8 +3203,12 @@ public:
               CMTask* task)
    : _g1h(g1h), _cm(cm), _task(task)
  {
-    _ref_processor = g1h->ref_processor();
-    assert(_ref_processor != NULL, "should not be NULL");
+    assert(_ref_processor == NULL, "should be initialized to NULL");
+
+    if (G1UseConcMarkReferenceProcessing) {
+      _ref_processor = g1h->ref_processor();
+      assert(_ref_processor != NULL, "should not be NULL");
+    }
  }
 };

@@ -4423,3 +4431,175 @@ CMTask::CMTask(int task_id,

  _marking_step_diffs_ms.add(0.5);
 }
+
+// These are formatting macros that are used below to ensure
+// consistent formatting. The *_H_* versions are used to format the
+// header for a particular value and they should be kept consistent
+// with the corresponding macro. Also note that most of the macros add
+// the necessary white space (as a prefix) which makes them a bit
+// easier to compose.
+
+// All the output lines are prefixed with this string to be able to
+// identify them easily in a large log file.
+#define G1PPRL_LINE_PREFIX            "###"
+
+#define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
+#ifdef _LP64
+#define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
+#else // _LP64
+#define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
+#endif // _LP64
+
+// For per-region info
+#define G1PPRL_TYPE_FORMAT            "   %-4s"
+#define G1PPRL_TYPE_H_FORMAT          "   %4s"
+#define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
+#define G1PPRL_BYTE_H_FORMAT          "  %9s"
+#define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
+#define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
+
+// For summary info
+#define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
+#define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
+#define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
+#define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
+
+G1PrintRegionLivenessInfoClosure::
+G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
+  : _out(out),
+    _total_used_bytes(0), _total_capacity_bytes(0),
+    _total_prev_live_bytes(0), _total_next_live_bytes(0),
+    _hum_used_bytes(0), _hum_capacity_bytes(0),
+    _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  MemRegion g1_committed = g1h->g1_committed();
+  MemRegion g1_reserved = g1h->g1_reserved();
+  double now = os::elapsedTime();
+
+  // Print the header of the output.
+  _out->cr();
+  _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
+  _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
+                 G1PPRL_SUM_ADDR_FORMAT("committed")
+                 G1PPRL_SUM_ADDR_FORMAT("reserved")
+                 G1PPRL_SUM_BYTE_FORMAT("region-size"),
+                 g1_committed.start(), g1_committed.end(),
+                 g1_reserved.start(), g1_reserved.end(),
+                 HeapRegion::GrainBytes);
+  _out->print_cr(G1PPRL_LINE_PREFIX);
+  _out->print_cr(G1PPRL_LINE_PREFIX
+                 G1PPRL_TYPE_H_FORMAT
+                 G1PPRL_ADDR_BASE_H_FORMAT
+                 G1PPRL_BYTE_H_FORMAT
+                 G1PPRL_BYTE_H_FORMAT
+                 G1PPRL_BYTE_H_FORMAT
+                 G1PPRL_DOUBLE_H_FORMAT,
+                 "type", "address-range",
+                 "used", "prev-live", "next-live", "gc-eff");
+}
+
+// It takes as a parameter a reference to one of the _hum_* fields, it
+// deduces the corresponding value for a region in a humongous region
+// series (either the region size, or what's left if the _hum_* field
+// is < the region size), and updates the _hum_* field accordingly.
+size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
+  size_t bytes = 0;
+  // The > 0 check is to deal with the prev and next live bytes which
+  // could be 0.
+  if (*hum_bytes > 0) {
+    bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes);
+    *hum_bytes -= bytes;
+  }
+  return bytes;
+}
+
+// It deduces the values for a region in a humongous region series
+// from the _hum_* fields and updates those accordingly. It assumes
+// that that _hum_* fields have already been set up from the "starts
+// humongous" region and we visit the regions in address order.
+void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
+                                                     size_t* capacity_bytes,
+                                                     size_t* prev_live_bytes,
+                                                     size_t* next_live_bytes) {
+  assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
+  *used_bytes      = get_hum_bytes(&_hum_used_bytes);
+  *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
+  *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
+  *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
+}
+
+bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
+  const char* type = "";
+  HeapWord* bottom       = r->bottom();
+  HeapWord* end          = r->end();
+  size_t capacity_bytes  = r->capacity();
+  size_t used_bytes      = r->used();
+  size_t prev_live_bytes = r->live_bytes();
+  size_t next_live_bytes = r->next_live_bytes();
+  double gc_eff          = r->gc_efficiency();
+  if (r->used() == 0) {
+    type = "FREE";
+  } else if (r->is_survivor()) {
+    type = "SURV";
+  } else if (r->is_young()) {
+    type = "EDEN";
+  } else if (r->startsHumongous()) {
+    type = "HUMS";
+
+    assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
+           _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
+           "they should have been zeroed after the last time we used them");
+    // Set up the _hum_* fields.
+    _hum_capacity_bytes  = capacity_bytes;
+    _hum_used_bytes      = used_bytes;
+    _hum_prev_live_bytes = prev_live_bytes;
+    _hum_next_live_bytes = next_live_bytes;
+    get_hum_bytes(&used_bytes, &capacity_bytes,
+                  &prev_live_bytes, &next_live_bytes);
+    end = bottom + HeapRegion::GrainWords;
+  } else if (r->continuesHumongous()) {
+    type = "HUMC";
+    get_hum_bytes(&used_bytes, &capacity_bytes,
+                  &prev_live_bytes, &next_live_bytes);
+    assert(end == bottom + HeapRegion::GrainWords, "invariant");
+  } else {
+    type = "OLD";
+  }
+
+  _total_used_bytes      += used_bytes;
+  _total_capacity_bytes  += capacity_bytes;
+  _total_prev_live_bytes += prev_live_bytes;
+  _total_next_live_bytes += next_live_bytes;
+
+  // Print a line for this particular region.
+  _out->print_cr(G1PPRL_LINE_PREFIX
+                 G1PPRL_TYPE_FORMAT
+                 G1PPRL_ADDR_BASE_FORMAT
+                 G1PPRL_BYTE_FORMAT
+                 G1PPRL_BYTE_FORMAT
+                 G1PPRL_BYTE_FORMAT
+                 G1PPRL_DOUBLE_FORMAT,
+                 type, bottom, end,
+                 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
+
+  return false;
+}
+
+G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
+  // Print the footer of the output.
+  _out->print_cr(G1PPRL_LINE_PREFIX);
+  _out->print_cr(G1PPRL_LINE_PREFIX
+                 " SUMMARY"
+                 G1PPRL_SUM_MB_FORMAT("capacity")
+                 G1PPRL_SUM_MB_PERC_FORMAT("used")
+                 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
+                 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
+                 bytes_to_mb(_total_capacity_bytes),
+                 bytes_to_mb(_total_used_bytes),
+                 perc(_total_used_bytes, _total_capacity_bytes),
+                 bytes_to_mb(_total_prev_live_bytes),
+                 perc(_total_prev_live_bytes, _total_capacity_bytes),
+                 bytes_to_mb(_total_next_live_bytes),
+                 perc(_total_next_live_bytes, _total_capacity_bytes));
+  _out->cr();
+}
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp
@@ -1149,4 +1149,54 @@ public:
 #endif // _MARKING_STATS_
 };

+// Class that's used to to print out per-region liveness
+// information. It's currently used at the end of marking and also
+// after we sort the old regions at the end of the cleanup operation.
+class G1PrintRegionLivenessInfoClosure: public HeapRegionClosure {
+private:
+  outputStream* _out;
+
+  // Accumulators for these values.
+  size_t _total_used_bytes;
+  size_t _total_capacity_bytes;
+  size_t _total_prev_live_bytes;
+  size_t _total_next_live_bytes;
+
+  // These are set up when we come across a "stars humongous" region
+  // (as this is where most of this information is stored, not in the
+  // subsequent "continues humongous" regions). After that, for every
+  // region in a given humongous region series we deduce the right
+  // values for it by simply subtracting the appropriate amount from
+  // these fields. All these values should reach 0 after we've visited
+  // the last region in the series.
+  size_t _hum_used_bytes;
+  size_t _hum_capacity_bytes;
+  size_t _hum_prev_live_bytes;
+  size_t _hum_next_live_bytes;
+
+  static double perc(size_t val, size_t total) {
+    if (total == 0) {
+      return 0.0;
+    } else {
+      return 100.0 * ((double) val / (double) total);
+    }
+  }
+
+  static double bytes_to_mb(size_t val) {
+    return (double) val / (double) M;
+  }
+
+  // See the .cpp file.
+  size_t get_hum_bytes(size_t* hum_bytes);
+  void get_hum_bytes(size_t* used_bytes, size_t* capacity_bytes,
+                     size_t* prev_live_bytes, size_t* next_live_bytes);
+
+public:
+  // The header and footer are printed in the constructor and
+  // destructor respectively.
+  G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name);
+  virtual bool doHeapRegion(HeapRegion* r);
+  ~G1PrintRegionLivenessInfoClosure();
+};
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
--- a/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp
+++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1AllocRegion.inline.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+
+G1CollectedHeap* G1AllocRegion::_g1h = NULL;
+HeapRegion* G1AllocRegion::_dummy_region = NULL;
+
+void G1AllocRegion::setup(G1CollectedHeap* g1h, HeapRegion* dummy_region) {
+  assert(_dummy_region == NULL, "should be set once");
+  assert(dummy_region != NULL, "pre-condition");
+  assert(dummy_region->free() == 0, "pre-condition");
+
+  // Make sure that any allocation attempt on this region will fail
+  // and will not trigger any asserts.
+  assert(allocate(dummy_region, 1, false) == NULL, "should fail");
+  assert(par_allocate(dummy_region, 1, false) == NULL, "should fail");
+  assert(allocate(dummy_region, 1, true) == NULL, "should fail");
+  assert(par_allocate(dummy_region, 1, true) == NULL, "should fail");
+
+  _g1h = g1h;
+  _dummy_region = dummy_region;
+}
+
+void G1AllocRegion::fill_up_remaining_space(HeapRegion* alloc_region,
+                                            bool bot_updates) {
+  assert(alloc_region != NULL && alloc_region != _dummy_region,
+         "pre-condition");
+
+  // Other threads might still be trying to allocate using a CAS out
+  // of the region we are trying to retire, as they can do so without
+  // holding the lock. So, we first have to make sure that noone else
+  // can allocate out of it by doing a maximal allocation. Even if our
+  // CAS attempt fails a few times, we'll succeed sooner or later
+  // given that failed CAS attempts mean that the region is getting
+  // closed to being full.
+  size_t free_word_size = alloc_region->free() / HeapWordSize;
+
+  // This is the minimum free chunk we can turn into a dummy
+  // object. If the free space falls below this, then noone can
+  // allocate in this region anyway (all allocation requests will be
+  // of a size larger than this) so we won't have to perform the dummy
+  // allocation.
+  size_t min_word_size_to_fill = CollectedHeap::min_fill_size();
+
+  while (free_word_size >= min_word_size_to_fill) {
+    HeapWord* dummy = par_allocate(alloc_region, free_word_size, bot_updates);
+    if (dummy != NULL) {
+      // If the allocation was successful we should fill in the space.
+      CollectedHeap::fill_with_object(dummy, free_word_size);
+      alloc_region->set_pre_dummy_top(dummy);
+      break;
+    }
+
+    free_word_size = alloc_region->free() / HeapWordSize;
+    // It's also possible that someone else beats us to the
+    // allocation and they fill up the region. In that case, we can
+    // just get out of the loop.
+  }
+  assert(alloc_region->free() / HeapWordSize < min_word_size_to_fill,
+         "post-condition");
+}
+
+void G1AllocRegion::retire(bool fill_up) {
+  assert(_alloc_region != NULL, ar_ext_msg(this, "not initialized properly"));
+
+  trace("retiring");
+  HeapRegion* alloc_region = _alloc_region;
+  if (alloc_region != _dummy_region) {
+    // We never have to check whether the active region is empty or not,
+    // and potentially free it if it is, given that it's guaranteed that
+    // it will never be empty.
+    assert(!alloc_region->is_empty(),
+           ar_ext_msg(this, "the alloc region should never be empty"));
+
+    if (fill_up) {
+      fill_up_remaining_space(alloc_region, _bot_updates);
+    }
+
+    assert(alloc_region->used() >= _used_bytes_before,
+           ar_ext_msg(this, "invariant"));
+    size_t allocated_bytes = alloc_region->used() - _used_bytes_before;
+    retire_region(alloc_region, allocated_bytes);
+    _used_bytes_before = 0;
+    _alloc_region = _dummy_region;
+  }
+  trace("retired");
+}
+
+HeapWord* G1AllocRegion::new_alloc_region_and_allocate(size_t word_size,
+                                                       bool force) {
+  assert(_alloc_region == _dummy_region, ar_ext_msg(this, "pre-condition"));
+  assert(_used_bytes_before == 0, ar_ext_msg(this, "pre-condition"));
+
+  trace("attempting region allocation");
+  HeapRegion* new_alloc_region = allocate_new_region(word_size, force);
+  if (new_alloc_region != NULL) {
+    new_alloc_region->reset_pre_dummy_top();
+    // Need to do this before the allocation
+    _used_bytes_before = new_alloc_region->used();
+    HeapWord* result = allocate(new_alloc_region, word_size, _bot_updates);
+    assert(result != NULL, ar_ext_msg(this, "the allocation should succeeded"));
+
+    OrderAccess::storestore();
+    // Note that we first perform the allocation and then we store the
+    // region in _alloc_region. This is the reason why an active region
+    // can never be empty.
+    _alloc_region = new_alloc_region;
+    trace("region allocation successful");
+    return result;
+  } else {
+    trace("region allocation failed");
+    return NULL;
+  }
+  ShouldNotReachHere();
+}
+
+void G1AllocRegion::fill_in_ext_msg(ar_ext_msg* msg, const char* message) {
+  msg->append("[%s] %s b: %s r: "PTR_FORMAT" u: "SIZE_FORMAT,
+              _name, message, BOOL_TO_STR(_bot_updates),
+              _alloc_region, _used_bytes_before);
+}
+
+void G1AllocRegion::init() {
+  trace("initializing");
+  assert(_alloc_region == NULL && _used_bytes_before == 0,
+         ar_ext_msg(this, "pre-condition"));
+  assert(_dummy_region != NULL, "should have been set");
+  _alloc_region = _dummy_region;
+  trace("initialized");
+}
+
+HeapRegion* G1AllocRegion::release() {
+  trace("releasing");
+  HeapRegion* alloc_region = _alloc_region;
+  retire(false /* fill_up */);
+  assert(_alloc_region == _dummy_region, "post-condition of retire()");
+  _alloc_region = NULL;
+  trace("released");
+  return (alloc_region == _dummy_region) ? NULL : alloc_region;
+}
+
+#if G1_ALLOC_REGION_TRACING
+void G1AllocRegion::trace(const char* str, size_t word_size, HeapWord* result) {
+  // All the calls to trace that set either just the size or the size
+  // and the result are considered part of level 2 tracing and are
+  // skipped during level 1 tracing.
+  if ((word_size == 0 && result == NULL) || (G1_ALLOC_REGION_TRACING > 1)) {
+    const size_t buffer_length = 128;
+    char hr_buffer[buffer_length];
+    char rest_buffer[buffer_length];
+
+    HeapRegion* alloc_region = _alloc_region;
+    if (alloc_region == NULL) {
+      jio_snprintf(hr_buffer, buffer_length, "NULL");
+    } else if (alloc_region == _dummy_region) {
+      jio_snprintf(hr_buffer, buffer_length, "DUMMY");
+    } else {
+      jio_snprintf(hr_buffer, buffer_length,
+                   HR_FORMAT, HR_FORMAT_PARAMS(alloc_region));
+    }
+
+    if (G1_ALLOC_REGION_TRACING > 1) {
+      if (result != NULL) {
+        jio_snprintf(rest_buffer, buffer_length, SIZE_FORMAT" "PTR_FORMAT,
+                     word_size, result);
+      } else if (word_size != 0) {
+        jio_snprintf(rest_buffer, buffer_length, SIZE_FORMAT, word_size);
+      } else {
+        jio_snprintf(rest_buffer, buffer_length, "");
+      }
+    } else {
+      jio_snprintf(rest_buffer, buffer_length, "");
+    }
+
+    tty->print_cr("[%s] %s : %s %s", _name, hr_buffer, str, rest_buffer);
+  }
+}
+#endif // G1_ALLOC_REGION_TRACING
+
+G1AllocRegion::G1AllocRegion(const char* name,
+                             bool bot_updates)
+  : _name(name), _bot_updates(bot_updates),
+    _alloc_region(NULL), _used_bytes_before(0) { }
+
--- a/src/share/vm/gc_implementation/g1/g1AllocRegion.hpp
+++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.hpp
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_HPP
+
+#include "gc_implementation/g1/heapRegion.hpp"
+
+class G1CollectedHeap;
+
+// 0 -> no tracing, 1 -> basic tracing, 2 -> basic + allocation tracing
+#define G1_ALLOC_REGION_TRACING 0
+
+class ar_ext_msg;
+
+// A class that holds a region that is active in satisfying allocation
+// requests, potentially issued in parallel. When the active region is
+// full it will be retired it replaced with a new one. The
+// implementation assumes that fast-path allocations will be lock-free
+// and a lock will need to be taken when the active region needs to be
+// replaced.
+
+class G1AllocRegion VALUE_OBJ_CLASS_SPEC {
+  friend class ar_ext_msg;
+
+private:
+  // The active allocating region we are currently allocating out
+  // of. The invariant is that if this object is initialized (i.e.,
+  // init() has been called and release() has not) then _alloc_region
+  // is either an active allocating region or the dummy region (i.e.,
+  // it can never be NULL) and this object can be used to satisfy
+  // allocation requests. If this object is not initialized
+  // (i.e. init() has not been called or release() has been called)
+  // then _alloc_region is NULL and this object should not be used to
+  // satisfy allocation requests (it was done this way to force the
+  // correct use of init() and release()).
+  HeapRegion* _alloc_region;
+
+  // When we set up a new active region we save its used bytes in this
+  // field so that, when we retire it, we can calculate how much space
+  // we allocated in it.
+  size_t _used_bytes_before;
+
+  // Specifies whether the allocate calls will do BOT updates or not.
+  bool _bot_updates;
+
+  // Useful for debugging and tracing.
+  const char* _name;
+
+  // A dummy region (i.e., it's been allocated specially for this
+  // purpose and it is not part of the heap) that is full (i.e., top()
+  // == end()). When we don't have a valid active region we make
+  // _alloc_region point to this. This allows us to skip checking
+  // whether the _alloc_region is NULL or not.
+  static HeapRegion* _dummy_region;
+
+  // Some of the methods below take a bot_updates parameter. Its value
+  // should be the same as the _bot_updates field. The idea is that
+  // the parameter will be a constant for a particular alloc region
+  // and, given that these methods will be hopefully inlined, the
+  // compiler should compile out the test.
+
+  // Perform a non-MT-safe allocation out of the given region.
+  static inline HeapWord* allocate(HeapRegion* alloc_region,
+                                   size_t word_size,
+                                   bool bot_updates);
+
+  // Perform a MT-safe allocation out of the given region.
+  static inline HeapWord* par_allocate(HeapRegion* alloc_region,
+                                       size_t word_size,
+                                       bool bot_updates);
+
+  // Ensure that the region passed as a parameter has been filled up
+  // so that noone else can allocate out of it any more.
+  static void fill_up_remaining_space(HeapRegion* alloc_region,
+                                      bool bot_updates);
+
+  // Retire the active allocating region. If fill_up is true then make
+  // sure that the region is full before we retire it so that noone
+  // else can allocate out of it.
+  void retire(bool fill_up);
+
+  // Allocate a new active region and use it to perform a word_size
+  // allocation. The force parameter will be passed on to
+  // G1CollectedHeap::allocate_new_alloc_region() and tells it to try
+  // to allocate a new region even if the max has been reached.
+  HeapWord* new_alloc_region_and_allocate(size_t word_size, bool force);
+
+  void fill_in_ext_msg(ar_ext_msg* msg, const char* message);
+
+protected:
+  // For convenience as subclasses use it.
+  static G1CollectedHeap* _g1h;
+
+  virtual HeapRegion* allocate_new_region(size_t word_size, bool force) = 0;
+  virtual void retire_region(HeapRegion* alloc_region,
+                             size_t allocated_bytes) = 0;
+
+  G1AllocRegion(const char* name, bool bot_updates);
+
+public:
+  static void setup(G1CollectedHeap* g1h, HeapRegion* dummy_region);
+
+  HeapRegion* get() const {
+    // Make sure that the dummy region does not escape this class.
+    return (_alloc_region == _dummy_region) ? NULL : _alloc_region;
+  }
+
+  // The following two are the building blocks for the allocation method.
+
+  // First-level allocation: Should be called without holding a
+  // lock. It will try to allocate lock-free out of the active region,
+  // or return NULL if it was unable to.
+  inline HeapWord* attempt_allocation(size_t word_size, bool bot_updates);
+
+  // Second-level allocation: Should be called while holding a
+  // lock. It will try to first allocate lock-free out of the active
+  // region or, if it's unable to, it will try to replace the active
+  // alloc region with a new one. We require that the caller takes the
+  // appropriate lock before calling this so that it is easier to make
+  // it conform to its locking protocol.
+  inline HeapWord* attempt_allocation_locked(size_t word_size,
+                                             bool bot_updates);
+
+  // Should be called to allocate a new region even if the max of this
+  // type of regions has been reached. Should only be called if other
+  // allocation attempts have failed and we are not holding a valid
+  // active region.
+  inline HeapWord* attempt_allocation_force(size_t word_size,
+                                            bool bot_updates);
+
+  // Should be called before we start using this object.
+  void init();
+
+  // Should be called when we want to release the active region which
+  // is returned after it's been retired.
+  HeapRegion* release();
+
+#if G1_ALLOC_REGION_TRACING
+  void trace(const char* str, size_t word_size = 0, HeapWord* result = NULL);
+#else // G1_ALLOC_REGION_TRACING
+  void trace(const char* str, size_t word_size = 0, HeapWord* result = NULL) { }
+#endif // G1_ALLOC_REGION_TRACING
+};
+
+class ar_ext_msg : public err_msg {
+public:
+  ar_ext_msg(G1AllocRegion* alloc_region, const char *message) : err_msg("") {
+    alloc_region->fill_in_ext_msg(this, message);
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_HPP
--- a/src/share/vm/gc_implementation/g1/g1AllocRegion.inline.hpp
+++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.inline.hpp
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_INLINE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_INLINE_HPP
+
+#include "gc_implementation/g1/g1AllocRegion.hpp"
+
+inline HeapWord* G1AllocRegion::allocate(HeapRegion* alloc_region,
+                                         size_t word_size,
+                                         bool bot_updates) {
+  assert(alloc_region != NULL, err_msg("pre-condition"));
+
+  if (!bot_updates) {
+    return alloc_region->allocate_no_bot_updates(word_size);
+  } else {
+    return alloc_region->allocate(word_size);
+  }
+}
+
+inline HeapWord* G1AllocRegion::par_allocate(HeapRegion* alloc_region,
+                                             size_t word_size,
+                                             bool bot_updates) {
+  assert(alloc_region != NULL, err_msg("pre-condition"));
+  assert(!alloc_region->is_empty(), err_msg("pre-condition"));
+
+  if (!bot_updates) {
+    return alloc_region->par_allocate_no_bot_updates(word_size);
+  } else {
+    return alloc_region->par_allocate(word_size);
+  }
+}
+
+inline HeapWord* G1AllocRegion::attempt_allocation(size_t word_size,
+                                                   bool bot_updates) {
+  assert(bot_updates == _bot_updates, ar_ext_msg(this, "pre-condition"));
+
+  HeapRegion* alloc_region = _alloc_region;
+  assert(alloc_region != NULL, ar_ext_msg(this, "not initialized properly"));
+
+  HeapWord* result = par_allocate(alloc_region, word_size, bot_updates);
+  if (result != NULL) {
+    trace("alloc", word_size, result);
+    return result;
+  }
+  trace("alloc failed", word_size);
+  return NULL;
+}
+
+inline HeapWord* G1AllocRegion::attempt_allocation_locked(size_t word_size,
+                                                          bool bot_updates) {
+  // First we have to tedo the allocation, assuming we're holding the
+  // appropriate lock, in case another thread changed the region while
+  // we were waiting to get the lock.
+  HeapWord* result = attempt_allocation(word_size, bot_updates);
+  if (result != NULL) {
+    return result;
+  }
+
+  retire(true /* fill_up */);
+  result = new_alloc_region_and_allocate(word_size, false /* force */);
+  if (result != NULL) {
+    trace("alloc locked (second attempt)", word_size, result);
+    return result;
+  }
+  trace("alloc locked failed", word_size);
+  return NULL;
+}
+
+inline HeapWord* G1AllocRegion::attempt_allocation_force(size_t word_size,
+                                                         bool bot_updates) {
+  assert(bot_updates == _bot_updates, ar_ext_msg(this, "pre-condition"));
+  assert(_alloc_region != NULL, ar_ext_msg(this, "not initialized properly"));
+
+  trace("forcing alloc");
+  HeapWord* result = new_alloc_region_and_allocate(word_size, true /* force */);
+  if (result != NULL) {
+    trace("alloc forced", word_size, result);
+    return result;
+  }
+  trace("alloc forced failed", word_size);
+  return NULL;
+}
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@@ -28,6 +28,7 @@
 #include "gc_implementation/g1/concurrentG1Refine.hpp"
 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
+#include "gc_implementation/g1/g1AllocRegion.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
@@ -517,8 +518,7 @@ G1CollectedHeap::new_region_try_secondary_free_list() {
  return NULL;
 }

-HeapRegion* G1CollectedHeap::new_region_work(size_t word_size,
-                                             bool do_expand) {
+HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool do_expand) {
  assert(!isHumongous(word_size) ||
                                  word_size <= (size_t) HeapRegion::GrainWords,
         "the only time we use this to allocate a humongous region is "
@@ -566,7 +566,7 @@ HeapRegion* G1CollectedHeap::new_gc_alloc_region(int purpose,
                                                 size_t word_size) {
  HeapRegion* alloc_region = NULL;
  if (_gc_alloc_region_counts[purpose] < g1_policy()->max_regions(purpose)) {
-    alloc_region = new_region_work(word_size, true /* do_expand */);
+    alloc_region = new_region(word_size, true /* do_expand */);
    if (purpose == GCAllocForSurvived && alloc_region != NULL) {
      alloc_region->set_survivor();
    }
@@ -587,7 +587,7 @@ int G1CollectedHeap::humongous_obj_allocate_find_first(size_t num_regions,
    // Only one region to allocate, no need to go through the slower
    // path. The caller will attempt the expasion if this fails, so
    // let's not try to expand here too.
-    HeapRegion* hr = new_region_work(word_size, false /* do_expand */);
+    HeapRegion* hr = new_region(word_size, false /* do_expand */);
    if (hr != NULL) {
      first = hr->hrs_index();
    } else {
@@ -788,508 +788,268 @@ HeapWord* G1CollectedHeap::humongous_obj_allocate(size_t word_size) {
  return result;
 }

-void
-G1CollectedHeap::retire_cur_alloc_region(HeapRegion* cur_alloc_region) {
-  // Other threads might still be trying to allocate using CASes out
-  // of the region we are retiring, as they can do so without holding
-  // the Heap_lock. So we first have to make sure that noone else can
-  // allocate in it by doing a maximal allocation. Even if our CAS
-  // attempt fails a few times, we'll succeed sooner or later given
-  // that a failed CAS attempt mean that the region is getting closed
-  // to being full (someone else succeeded in allocating into it).
-  size_t free_word_size = cur_alloc_region->free() / HeapWordSize;
-
-  // This is the minimum free chunk we can turn into a dummy
-  // object. If the free space falls below this, then noone can
-  // allocate in this region anyway (all allocation requests will be
-  // of a size larger than this) so we won't have to perform the dummy
-  // allocation.
-  size_t min_word_size_to_fill = CollectedHeap::min_fill_size();
-
-  while (free_word_size >= min_word_size_to_fill) {
-    HeapWord* dummy =
-      cur_alloc_region->par_allocate_no_bot_updates(free_word_size);
-    if (dummy != NULL) {
-      // If the allocation was successful we should fill in the space.
-      CollectedHeap::fill_with_object(dummy, free_word_size);
-      break;
-    }
-
-    free_word_size = cur_alloc_region->free() / HeapWordSize;
-    // It's also possible that someone else beats us to the
-    // allocation and they fill up the region. In that case, we can
-    // just get out of the loop
-  }
-  assert(cur_alloc_region->free() / HeapWordSize < min_word_size_to_fill,
-         "sanity");
-
-  retire_cur_alloc_region_common(cur_alloc_region);
-  assert(_cur_alloc_region == NULL, "post-condition");
-}
-
-// See the comment in the .hpp file about the locking protocol and
-// assumptions of this method (and other related ones).
-HeapWord*
-G1CollectedHeap::replace_cur_alloc_region_and_allocate(size_t word_size,
-                                                       bool at_safepoint,
-                                                       bool do_dirtying,
-                                                       bool can_expand) {
-  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
-  assert(_cur_alloc_region == NULL,
-         "replace_cur_alloc_region_and_allocate() should only be called "
-         "after retiring the previous current alloc region");
-  assert(SafepointSynchronize::is_at_safepoint() == at_safepoint,
-         "at_safepoint and is_at_safepoint() should be a tautology");
-  assert(!can_expand || g1_policy()->can_expand_young_list(),
-         "we should not call this method with can_expand == true if "
-         "we are not allowed to expand the young gen");
-
-  if (can_expand || !g1_policy()->is_young_list_full()) {
-    HeapRegion* new_cur_alloc_region = new_alloc_region(word_size);
-    if (new_cur_alloc_region != NULL) {
-      assert(new_cur_alloc_region->is_empty(),
-             "the newly-allocated region should be empty, "
-             "as right now we only allocate new regions out of the free list");
-      g1_policy()->update_region_num(true /* next_is_young */);
-      set_region_short_lived_locked(new_cur_alloc_region);
-
-      assert(!new_cur_alloc_region->isHumongous(),
-             "Catch a regression of this bug.");
-
-      // We need to ensure that the stores to _cur_alloc_region and,
-      // subsequently, to top do not float above the setting of the
-      // young type.
-      OrderAccess::storestore();
-
-      // Now, perform the allocation out of the region we just
-      // allocated. Note that noone else can access that region at
-      // this point (as _cur_alloc_region has not been updated yet),
-      // so we can just go ahead and do the allocation without any
-      // atomics (and we expect this allocation attempt to
-      // suceeded). Given that other threads can attempt an allocation
-      // with a CAS and without needing the Heap_lock, if we assigned
-      // the new region to _cur_alloc_region before first allocating
-      // into it other threads might have filled up the new region
-      // before we got a chance to do the allocation ourselves. In
-      // that case, we would have needed to retire the region, grab a
-      // new one, and go through all this again. Allocating out of the
-      // new region before assigning it to _cur_alloc_region avoids
-      // all this.
-      HeapWord* result =
-                     new_cur_alloc_region->allocate_no_bot_updates(word_size);
-      assert(result != NULL, "we just allocate out of an empty region "
-             "so allocation should have been successful");
-      assert(is_in(result), "result should be in the heap");
-
-      // Now make sure that the store to _cur_alloc_region does not
-      // float above the store to top.
-      OrderAccess::storestore();
-      _cur_alloc_region = new_cur_alloc_region;
-
-      if (!at_safepoint) {
-        Heap_lock->unlock();
-      }
-
-      // do the dirtying, if necessary, after we release the Heap_lock
-      if (do_dirtying) {
-        dirty_young_block(result, word_size);
-      }
-      return result;
-    }
-  }
+HeapWord* G1CollectedHeap::allocate_new_tlab(size_t word_size) {
+  assert_heap_not_locked_and_not_at_safepoint();
+  assert(!isHumongous(word_size), "we do not allow humongous TLABs");

-  assert(_cur_alloc_region == NULL, "we failed to allocate a new current "
-         "alloc region, it should still be NULL");
-  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
-  return NULL;
+  unsigned int dummy_gc_count_before;
+  return attempt_allocation(word_size, &dummy_gc_count_before);
 }

-// See the comment in the .hpp file about the locking protocol and
-// assumptions of this method (and other related ones).
 HeapWord*
-G1CollectedHeap::attempt_allocation_slow(size_t word_size) {
-  assert_heap_locked_and_not_at_safepoint();
-  assert(!isHumongous(word_size), "attempt_allocation_slow() should not be "
-         "used for humongous allocations");
-
-  // We should only reach here when we were unable to allocate
-  // otherwise. So, we should have not active current alloc region.
-  assert(_cur_alloc_region == NULL, "current alloc region should be NULL");
-
-  // We will loop while succeeded is false, which means that we tried
-  // to do a collection, but the VM op did not succeed. So, when we
-  // exit the loop, either one of the allocation attempts was
-  // successful, or we succeeded in doing the VM op but which was
-  // unable to allocate after the collection.
-  for (int try_count = 1; /* we'll return or break */; try_count += 1) {
-    bool succeeded = true;
-
-    // Every time we go round the loop we should be holding the Heap_lock.
-    assert_heap_locked();
-
-    if (GC_locker::is_active_and_needs_gc()) {
-      // We are locked out of GC because of the GC locker. We can
-      // allocate a new region only if we can expand the young gen.
-
-      if (g1_policy()->can_expand_young_list()) {
-        // Yes, we are allowed to expand the young gen. Let's try to
-        // allocate a new current alloc region.
-        HeapWord* result =
-          replace_cur_alloc_region_and_allocate(word_size,
-                                                false, /* at_safepoint */
-                                                true,  /* do_dirtying */
-                                                true   /* can_expand */);
-        if (result != NULL) {
-          assert_heap_not_locked();
-          return result;
-        }
-      }
-      // We could not expand the young gen further (or we could but we
-      // failed to allocate a new region). We'll stall until the GC
-      // locker forces a GC.
-
-      // If this thread is not in a jni critical section, we stall
-      // the requestor until the critical section has cleared and
-      // GC allowed. When the critical section clears, a GC is
-      // initiated by the last thread exiting the critical section; so
-      // we retry the allocation sequence from the beginning of the loop,
-      // rather than causing more, now probably unnecessary, GC attempts.
-      JavaThread* jthr = JavaThread::current();
-      assert(jthr != NULL, "sanity");
-      if (jthr->in_critical()) {
-        if (CheckJNICalls) {
-          fatal("Possible deadlock due to allocating while"
-                " in jni critical section");
-        }
-        // We are returning NULL so the protocol is that we're still
-        // holding the Heap_lock.
-        assert_heap_locked();
-        return NULL;
-      }
+G1CollectedHeap::mem_allocate(size_t word_size,
+                              bool   is_noref,
+                              bool   is_tlab,
+                              bool*  gc_overhead_limit_was_exceeded) {
+  assert_heap_not_locked_and_not_at_safepoint();
+  assert(!is_tlab, "mem_allocate() this should not be called directly "
+         "to allocate TLABs");

-      Heap_lock->unlock();
-      GC_locker::stall_until_clear();
+  // Loop until the allocation is satisified, or unsatisfied after GC.
+  for (int try_count = 1; /* we'll return */; try_count += 1) {
+    unsigned int gc_count_before;

-      // No need to relock the Heap_lock. We'll fall off to the code
-      // below the else-statement which assumes that we are not
-      // holding the Heap_lock.
+    HeapWord* result = NULL;
+    if (!isHumongous(word_size)) {
+      result = attempt_allocation(word_size, &gc_count_before);
    } else {
-      // We are not locked out. So, let's try to do a GC. The VM op
-      // will retry the allocation before it completes.
-
-      // Read the GC count while holding the Heap_lock
-      unsigned int gc_count_before = SharedHeap::heap()->total_collections();
-
-      Heap_lock->unlock();
+      result = attempt_allocation_humongous(word_size, &gc_count_before);
+    }
+    if (result != NULL) {
+      return result;
+    }

-      HeapWord* result =
-        do_collection_pause(word_size, gc_count_before, &succeeded);
-      assert_heap_not_locked();
-      if (result != NULL) {
-        assert(succeeded, "the VM op should have succeeded");
+    // Create the garbage collection operation...
+    VM_G1CollectForAllocation op(gc_count_before, word_size);
+    // ...and get the VM thread to execute it.
+    VMThread::execute(&op);

+    if (op.prologue_succeeded() && op.pause_succeeded()) {
+      // If the operation was successful we'll return the result even
+      // if it is NULL. If the allocation attempt failed immediately
+      // after a Full GC, it's unlikely we'll be able to allocate now.
+      HeapWord* result = op.result();
+      if (result != NULL && !isHumongous(word_size)) {
        // Allocations that take place on VM operations do not do any
-        // card dirtying and we have to do it here.
+        // card dirtying and we have to do it here. We only have to do
+        // this for non-humongous allocations, though.
        dirty_young_block(result, word_size);
-        return result;
      }
-    }
-
-    // Both paths that get us here from above unlock the Heap_lock.
-    assert_heap_not_locked();
-
-    // We can reach here when we were unsuccessful in doing a GC,
-    // because another thread beat us to it, or because we were locked
-    // out of GC due to the GC locker. In either case a new alloc
-    // region might be available so we will retry the allocation.
-    HeapWord* result = attempt_allocation(word_size);
-    if (result != NULL) {
-      assert_heap_not_locked();
      return result;
-    }
-
-    // So far our attempts to allocate failed. The only time we'll go
-    // around the loop and try again is if we tried to do a GC and the
-    // VM op that we tried to schedule was not successful because
-    // another thread beat us to it. If that happened it's possible
-    // that by the time we grabbed the Heap_lock again and tried to
-    // allocate other threads filled up the young generation, which
-    // means that the allocation attempt after the GC also failed. So,
-    // it's worth trying to schedule another GC pause.
-    if (succeeded) {
-      break;
+    } else {
+      assert(op.result() == NULL,
+             "the result should be NULL if the VM op did not succeed");
    }

    // Give a warning if we seem to be looping forever.
    if ((QueuedAllocationWarningCount > 0) &&
        (try_count % QueuedAllocationWarningCount == 0)) {
-      warning("G1CollectedHeap::attempt_allocation_slow() "
-              "retries %d times", try_count);
+      warning("G1CollectedHeap::mem_allocate retries %d times", try_count);
    }
  }

-  assert_heap_locked();
+  ShouldNotReachHere();
  return NULL;
 }

-// See the comment in the .hpp file about the locking protocol and
-// assumptions of this method (and other related ones).
-HeapWord*
-G1CollectedHeap::attempt_allocation_humongous(size_t word_size,
-                                              bool at_safepoint) {
-  // This is the method that will allocate a humongous object. All
-  // allocation paths that attempt to allocate a humongous object
-  // should eventually reach here. Currently, the only paths are from
-  // mem_allocate() and attempt_allocation_at_safepoint().
-  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
-  assert(isHumongous(word_size), "attempt_allocation_humongous() "
-         "should only be used for humongous allocations");
-  assert(SafepointSynchronize::is_at_safepoint() == at_safepoint,
-         "at_safepoint and is_at_safepoint() should be a tautology");
+HeapWord* G1CollectedHeap::attempt_allocation_slow(size_t word_size,
+                                           unsigned int *gc_count_before_ret) {
+  // Make sure you read the note in attempt_allocation_humongous().
+
+  assert_heap_not_locked_and_not_at_safepoint();
+  assert(!isHumongous(word_size), "attempt_allocation_slow() should not "
+         "be called for humongous allocation requests");

+  // We should only get here after the first-level allocation attempt
+  // (attempt_allocation()) failed to allocate.
+
+  // We will loop until a) we manage to successfully perform the
+  // allocation or b) we successfully schedule a collection which
+  // fails to perform the allocation. b) is the only case when we'll
+  // return NULL.
  HeapWord* result = NULL;
+  for (int try_count = 1; /* we'll return */; try_count += 1) {
+    bool should_try_gc;
+    unsigned int gc_count_before;

-  // We will loop while succeeded is false, which means that we tried
-  // to do a collection, but the VM op did not succeed. So, when we
-  // exit the loop, either one of the allocation attempts was
-  // successful, or we succeeded in doing the VM op but which was
-  // unable to allocate after the collection.
-  for (int try_count = 1; /* we'll return or break */; try_count += 1) {
-    bool succeeded = true;
-
-    // Given that humongous objects are not allocated in young
-    // regions, we'll first try to do the allocation without doing a
-    // collection hoping that there's enough space in the heap.
-    result = humongous_obj_allocate(word_size);
-    assert(_cur_alloc_region == NULL || !_cur_alloc_region->isHumongous(),
-           "catch a regression of this bug.");
-    if (result != NULL) {
-      if (!at_safepoint) {
-        // If we're not at a safepoint, unlock the Heap_lock.
-        Heap_lock->unlock();
+    {
+      MutexLockerEx x(Heap_lock);
+
+      result = _mutator_alloc_region.attempt_allocation_locked(word_size,
+                                                      false /* bot_updates */);
+      if (result != NULL) {
+        return result;
      }
-      return result;
-    }

-    // If we failed to allocate the humongous object, we should try to
-    // do a collection pause (if we're allowed) in case it reclaims
-    // enough space for the allocation to succeed after the pause.
-    if (!at_safepoint) {
-      // Read the GC count while holding the Heap_lock
-      unsigned int gc_count_before = SharedHeap::heap()->total_collections();
+      // If we reach here, attempt_allocation_locked() above failed to
+      // allocate a new region. So the mutator alloc region should be NULL.
+      assert(_mutator_alloc_region.get() == NULL, "only way to get here");

-      // If we're allowed to do a collection we're not at a
-      // safepoint, so it is safe to unlock the Heap_lock.
-      Heap_lock->unlock();
+      if (GC_locker::is_active_and_needs_gc()) {
+        if (g1_policy()->can_expand_young_list()) {
+          result = _mutator_alloc_region.attempt_allocation_force(word_size,
+                                                      false /* bot_updates */);
+          if (result != NULL) {
+            return result;
+          }
+        }
+        should_try_gc = false;
+      } else {
+        // Read the GC count while still holding the Heap_lock.
+        gc_count_before = SharedHeap::heap()->total_collections();
+        should_try_gc = true;
+      }
+    }

+    if (should_try_gc) {
+      bool succeeded;
      result = do_collection_pause(word_size, gc_count_before, &succeeded);
-      assert_heap_not_locked();
      if (result != NULL) {
-        assert(succeeded, "the VM op should have succeeded");
+        assert(succeeded, "only way to get back a non-NULL result");
        return result;
      }

-      // If we get here, the VM operation either did not succeed
-      // (i.e., another thread beat us to it) or it succeeded but
-      // failed to allocate the object.
-
-      // If we're allowed to do a collection we're not at a
-      // safepoint, so it is safe to lock the Heap_lock.
-      Heap_lock->lock();
+      if (succeeded) {
+        // If we get here we successfully scheduled a collection which
+        // failed to allocate. No point in trying to allocate
+        // further. We'll just return NULL.
+        MutexLockerEx x(Heap_lock);
+        *gc_count_before_ret = SharedHeap::heap()->total_collections();
+        return NULL;
+      }
+    } else {
+      GC_locker::stall_until_clear();
    }

-    assert(result == NULL, "otherwise we should have exited the loop earlier");
-
-    // So far our attempts to allocate failed. The only time we'll go
-    // around the loop and try again is if we tried to do a GC and the
-    // VM op that we tried to schedule was not successful because
-    // another thread beat us to it. That way it's possible that some
-    // space was freed up by the thread that successfully scheduled a
-    // GC. So it's worth trying to allocate again.
-    if (succeeded) {
-      break;
+    // We can reach here if we were unsuccessul in scheduling a
+    // collection (because another thread beat us to it) or if we were
+    // stalled due to the GC locker. In either can we should retry the
+    // allocation attempt in case another thread successfully
+    // performed a collection and reclaimed enough space. We do the
+    // first attempt (without holding the Heap_lock) here and the
+    // follow-on attempt will be at the start of the next loop
+    // iteration (after taking the Heap_lock).
+    result = _mutator_alloc_region.attempt_allocation(word_size,
+                                                      false /* bot_updates */);
+    if (result != NULL ){
+      return result;
    }

    // Give a warning if we seem to be looping forever.
    if ((QueuedAllocationWarningCount > 0) &&
        (try_count % QueuedAllocationWarningCount == 0)) {
-      warning("G1CollectedHeap::attempt_allocation_humongous "
+      warning("G1CollectedHeap::attempt_allocation_slow() "
              "retries %d times", try_count);
    }
  }

-  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
-  return NULL;
-}
-
-HeapWord* G1CollectedHeap::attempt_allocation_at_safepoint(size_t word_size,
-                                           bool expect_null_cur_alloc_region) {
-  assert_at_safepoint(true /* should_be_vm_thread */);
-  assert(_cur_alloc_region == NULL || !expect_null_cur_alloc_region,
-         err_msg("the current alloc region was unexpectedly found "
-                 "to be non-NULL, cur alloc region: "PTR_FORMAT" "
-                 "expect_null_cur_alloc_region: %d word_size: "SIZE_FORMAT,
-                 _cur_alloc_region, expect_null_cur_alloc_region, word_size));
-
-  if (!isHumongous(word_size)) {
-    if (!expect_null_cur_alloc_region) {
-      HeapRegion* cur_alloc_region = _cur_alloc_region;
-      if (cur_alloc_region != NULL) {
-        // We are at a safepoint so no reason to use the MT-safe version.
-        HeapWord* result = cur_alloc_region->allocate_no_bot_updates(word_size);
-        if (result != NULL) {
-          assert(is_in(result), "result should be in the heap");
-
-          // We will not do any dirtying here. This is guaranteed to be
-          // called during a safepoint and the thread that scheduled the
-          // pause will do the dirtying if we return a non-NULL result.
-          return result;
-        }
-
-        retire_cur_alloc_region_common(cur_alloc_region);
-      }
-    }
-
-    assert(_cur_alloc_region == NULL,
-           "at this point we should have no cur alloc region");
-    return replace_cur_alloc_region_and_allocate(word_size,
-                                                 true, /* at_safepoint */
-                                                 false /* do_dirtying */,
-                                                 false /* can_expand */);
-  } else {
-    return attempt_allocation_humongous(word_size,
-                                        true /* at_safepoint */);
-  }
-
  ShouldNotReachHere();
-}
-
-HeapWord* G1CollectedHeap::allocate_new_tlab(size_t word_size) {
-  assert_heap_not_locked_and_not_at_safepoint();
-  assert(!isHumongous(word_size), "we do not allow TLABs of humongous size");
-
-  // First attempt: Try allocating out of the current alloc region
-  // using a CAS. If that fails, take the Heap_lock and retry the
-  // allocation, potentially replacing the current alloc region.
-  HeapWord* result = attempt_allocation(word_size);
-  if (result != NULL) {
-    assert_heap_not_locked();
-    return result;
-  }
-
-  // Second attempt: Go to the slower path where we might try to
-  // schedule a collection.
-  result = attempt_allocation_slow(word_size);
-  if (result != NULL) {
-    assert_heap_not_locked();
-    return result;
-  }
-
-  assert_heap_locked();
-  // Need to unlock the Heap_lock before returning.
-  Heap_lock->unlock();
  return NULL;
 }

-HeapWord*
-G1CollectedHeap::mem_allocate(size_t word_size,
-                              bool   is_noref,
-                              bool   is_tlab,
-                              bool*  gc_overhead_limit_was_exceeded) {
+HeapWord* G1CollectedHeap::attempt_allocation_humongous(size_t word_size,
+                                          unsigned int * gc_count_before_ret) {
+  // The structure of this method has a lot of similarities to
+  // attempt_allocation_slow(). The reason these two were not merged
+  // into a single one is that such a method would require several "if
+  // allocation is not humongous do this, otherwise do that"
+  // conditional paths which would obscure its flow. In fact, an early
+  // version of this code did use a unified method which was harder to
+  // follow and, as a result, it had subtle bugs that were hard to
+  // track down. So keeping these two methods separate allows each to
+  // be more readable. It will be good to keep these two in sync as
+  // much as possible.
+
  assert_heap_not_locked_and_not_at_safepoint();
-  assert(!is_tlab, "mem_allocate() this should not be called directly "
-         "to allocate TLABs");
+  assert(isHumongous(word_size), "attempt_allocation_humongous() "
+         "should only be called for humongous allocations");

-  // Loop until the allocation is satisified,
-  // or unsatisfied after GC.
+  // We will loop until a) we manage to successfully perform the
+  // allocation or b) we successfully schedule a collection which
+  // fails to perform the allocation. b) is the only case when we'll
+  // return NULL.
+  HeapWord* result = NULL;
  for (int try_count = 1; /* we'll return */; try_count += 1) {
+    bool should_try_gc;
    unsigned int gc_count_before;
+
    {
-      if (!isHumongous(word_size)) {
-        // First attempt: Try allocating out of the current alloc region
-        // using a CAS. If that fails, take the Heap_lock and retry the
-        // allocation, potentially replacing the current alloc region.
-        HeapWord* result = attempt_allocation(word_size);
-        if (result != NULL) {
-          assert_heap_not_locked();
-          return result;
-        }
+      MutexLockerEx x(Heap_lock);

-        assert_heap_locked();
+      // Given that humongous objects are not allocated in young
+      // regions, we'll first try to do the allocation without doing a
+      // collection hoping that there's enough space in the heap.
+      result = humongous_obj_allocate(word_size);
+      if (result != NULL) {
+        return result;
+      }

-        // Second attempt: Go to the slower path where we might try to
-        // schedule a collection.
-        result = attempt_allocation_slow(word_size);
-        if (result != NULL) {
-          assert_heap_not_locked();
-          return result;
-        }
+      if (GC_locker::is_active_and_needs_gc()) {
+        should_try_gc = false;
      } else {
-        // attempt_allocation_humongous() requires the Heap_lock to be held.
-        Heap_lock->lock();
-
-        HeapWord* result = attempt_allocation_humongous(word_size,
-                                                     false /* at_safepoint */);
-        if (result != NULL) {
-          assert_heap_not_locked();
-          return result;
-        }
+        // Read the GC count while still holding the Heap_lock.
+        gc_count_before = SharedHeap::heap()->total_collections();
+        should_try_gc = true;
      }
-
-      assert_heap_locked();
-      // Read the gc count while the heap lock is held.
-      gc_count_before = SharedHeap::heap()->total_collections();
-
-      // Release the Heap_lock before attempting the collection.
-      Heap_lock->unlock();
    }

-    // Create the garbage collection operation...
-    VM_G1CollectForAllocation op(gc_count_before, word_size);
-    // ...and get the VM thread to execute it.
-    VMThread::execute(&op);
+    if (should_try_gc) {
+      // If we failed to allocate the humongous object, we should try to
+      // do a collection pause (if we're allowed) in case it reclaims
+      // enough space for the allocation to succeed after the pause.

-    assert_heap_not_locked();
-    if (op.prologue_succeeded() && op.pause_succeeded()) {
-      // If the operation was successful we'll return the result even
-      // if it is NULL. If the allocation attempt failed immediately
-      // after a Full GC, it's unlikely we'll be able to allocate now.
-      HeapWord* result = op.result();
-      if (result != NULL && !isHumongous(word_size)) {
-        // Allocations that take place on VM operations do not do any
-        // card dirtying and we have to do it here. We only have to do
-        // this for non-humongous allocations, though.
-        dirty_young_block(result, word_size);
+      bool succeeded;
+      result = do_collection_pause(word_size, gc_count_before, &succeeded);
+      if (result != NULL) {
+        assert(succeeded, "only way to get back a non-NULL result");
+        return result;
+      }
+
+      if (succeeded) {
+        // If we get here we successfully scheduled a collection which
+        // failed to allocate. No point in trying to allocate
+        // further. We'll just return NULL.
+        MutexLockerEx x(Heap_lock);
+        *gc_count_before_ret = SharedHeap::heap()->total_collections();
+        return NULL;
      }
-      return result;
    } else {
-      assert(op.result() == NULL,
-             "the result should be NULL if the VM op did not succeed");
+      GC_locker::stall_until_clear();
    }

-    // Give a warning if we seem to be looping forever.
+    // We can reach here if we were unsuccessul in scheduling a
+    // collection (because another thread beat us to it) or if we were
+    // stalled due to the GC locker. In either can we should retry the
+    // allocation attempt in case another thread successfully
+    // performed a collection and reclaimed enough space.  Give a
+    // warning if we seem to be looping forever.
+
    if ((QueuedAllocationWarningCount > 0) &&
        (try_count % QueuedAllocationWarningCount == 0)) {
-      warning("G1CollectedHeap::mem_allocate retries %d times", try_count);
+      warning("G1CollectedHeap::attempt_allocation_humongous() "
+              "retries %d times", try_count);
    }
  }

  ShouldNotReachHere();
+  return NULL;
 }

-void G1CollectedHeap::abandon_cur_alloc_region() {
+HeapWord* G1CollectedHeap::attempt_allocation_at_safepoint(size_t word_size,
+                                       bool expect_null_mutator_alloc_region) {
  assert_at_safepoint(true /* should_be_vm_thread */);
+  assert(_mutator_alloc_region.get() == NULL ||
+                                             !expect_null_mutator_alloc_region,
+         "the current alloc region was unexpectedly found to be non-NULL");

-  HeapRegion* cur_alloc_region = _cur_alloc_region;
-  if (cur_alloc_region != NULL) {
-    assert(!cur_alloc_region->is_empty(),
-           "the current alloc region can never be empty");
-    assert(cur_alloc_region->is_young(),
-           "the current alloc region should be young");
-
-    retire_cur_alloc_region_common(cur_alloc_region);
+  if (!isHumongous(word_size)) {
+    return _mutator_alloc_region.attempt_allocation_locked(word_size,
+                                                      false /* bot_updates */);
+  } else {
+    return humongous_obj_allocate(word_size);
  }
-  assert(_cur_alloc_region == NULL, "post-condition");
+
+  ShouldNotReachHere();
 }

 void G1CollectedHeap::abandon_gc_alloc_regions() {
@@ -1417,8 +1177,8 @@ bool G1CollectedHeap::do_collection(bool explicit_gc,

    if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) {
      HandleMark hm;  // Discard invalid handles created during verification
-      prepare_for_verify();
      gclog_or_tty->print(" VerifyBeforeGC:");
+      prepare_for_verify();
      Universe::verify(true);
    }

@@ -1439,9 +1199,8 @@ bool G1CollectedHeap::do_collection(bool explicit_gc,
    concurrent_mark()->abort();

    // Make sure we'll choose a new allocation region afterwards.
-    abandon_cur_alloc_region();
+    release_mutator_alloc_region();
    abandon_gc_alloc_regions();
-    assert(_cur_alloc_region == NULL, "Invariant.");
    g1_rem_set()->cleanupHRRS();
    tear_down_region_lists();

@@ -1547,6 +1306,8 @@ bool G1CollectedHeap::do_collection(bool explicit_gc,
    // evacuation pause.
    clear_cset_fast_test();

+    init_mutator_alloc_region();
+
    double end = os::elapsedTime();
    g1_policy()->record_full_collection_end();

@@ -1720,8 +1481,9 @@ G1CollectedHeap::satisfy_failed_allocation(size_t word_size,

  *succeeded = true;
  // Let's attempt the allocation first.
-  HeapWord* result = attempt_allocation_at_safepoint(word_size,
-                                     false /* expect_null_cur_alloc_region */);
+  HeapWord* result =
+    attempt_allocation_at_safepoint(word_size,
+                                 false /* expect_null_mutator_alloc_region */);
  if (result != NULL) {
    assert(*succeeded, "sanity");
    return result;
@@ -1748,7 +1510,7 @@ G1CollectedHeap::satisfy_failed_allocation(size_t word_size,

  // Retry the allocation
  result = attempt_allocation_at_safepoint(word_size,
-                                      true /* expect_null_cur_alloc_region */);
+                                  true /* expect_null_mutator_alloc_region */);
  if (result != NULL) {
    assert(*succeeded, "sanity");
    return result;
@@ -1765,7 +1527,7 @@ G1CollectedHeap::satisfy_failed_allocation(size_t word_size,

  // Retry the allocation once more
  result = attempt_allocation_at_safepoint(word_size,
-                                      true /* expect_null_cur_alloc_region */);
+                                  true /* expect_null_mutator_alloc_region */);
  if (result != NULL) {
    assert(*succeeded, "sanity");
    return result;
@@ -1796,7 +1558,7 @@ HeapWord* G1CollectedHeap::expand_and_allocate(size_t word_size) {
  if (expand(expand_bytes)) {
    verify_region_sets_optional();
    return attempt_allocation_at_safepoint(word_size,
-                                          false /* expect_null_cur_alloc_region */);
+                                 false /* expect_null_mutator_alloc_region */);
  }
  return NULL;
 }
@@ -1940,7 +1702,6 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
  _evac_failure_scan_stack(NULL) ,
  _mark_in_progress(false),
  _cg1r(NULL), _summary_bytes_used(0),
-  _cur_alloc_region(NULL),
  _refine_cte_cl(NULL),
  _full_collection(false),
  _free_list("Master Free List"),
@@ -2099,7 +1860,6 @@ jint G1CollectedHeap::initialize() {
  _g1_max_committed = _g1_committed;
  _hrs = new HeapRegionSeq(_expansion_regions);
  guarantee(_hrs != NULL, "Couldn't allocate HeapRegionSeq");
-  guarantee(_cur_alloc_region == NULL, "from constructor");

  // 6843694 - ensure that the maximum region index can fit
  // in the remembered set structures.
@@ -2195,6 +1955,22 @@ jint G1CollectedHeap::initialize() {
  // Do later initialization work for concurrent refinement.
  _cg1r->init();

+  // Here we allocate the dummy full region that is required by the
+  // G1AllocRegion class. If we don't pass an address in the reserved
+  // space here, lots of asserts fire.
+  MemRegion mr(_g1_reserved.start(), HeapRegion::GrainWords);
+  HeapRegion* dummy_region = new HeapRegion(_bot_shared, mr, true);
+  // We'll re-use the same region whether the alloc region will
+  // require BOT updates or not and, if it doesn't, then a non-young
+  // region will complain that it cannot support allocations without
+  // BOT updates. So we'll tag the dummy region as young to avoid that.
+  dummy_region->set_young();
+  // Make sure it's full.
+  dummy_region->set_top(dummy_region->end());
+  G1AllocRegion::setup(this, dummy_region);
+
+  init_mutator_alloc_region();
+
  return JNI_OK;
 }

@@ -2261,7 +2037,7 @@ size_t G1CollectedHeap::used() const {
         "Should be owned on this thread's behalf.");
  size_t result = _summary_bytes_used;
  // Read only once in case it is set to NULL concurrently
-  HeapRegion* hr = _cur_alloc_region;
+  HeapRegion* hr = _mutator_alloc_region.get();
  if (hr != NULL)
    result += hr->used();
  return result;
@@ -2324,13 +2100,11 @@ size_t G1CollectedHeap::unsafe_max_alloc() {
  // to free(), resulting in a SIGSEGV. Note that this doesn't appear
  // to be a problem in the optimized build, since the two loads of the
  // current allocation region field are optimized away.
-  HeapRegion* car = _cur_alloc_region;
-
-  // FIXME: should iterate over all regions?
-  if (car == NULL) {
+  HeapRegion* hr = _mutator_alloc_region.get();
+  if (hr == NULL) {
    return 0;
  }
-  return car->free();
+  return hr->free();
 }

 bool G1CollectedHeap::should_do_concurrent_full_gc(GCCause::Cause cause) {
@@ -2781,16 +2555,12 @@ size_t G1CollectedHeap::unsafe_max_tlab_alloc(Thread* ignored) const {
  // since we can't allow tlabs to grow big enough to accomodate
  // humongous objects.

-  // We need to store the cur alloc region locally, since it might change
-  // between when we test for NULL and when we use it later.
-  ContiguousSpace* cur_alloc_space = _cur_alloc_region;
+  HeapRegion* hr = _mutator_alloc_region.get();
  size_t max_tlab_size = _humongous_object_threshold_in_words * wordSize;
-
-  if (cur_alloc_space == NULL) {
+  if (hr == NULL) {
    return max_tlab_size;
  } else {
-    return MIN2(MAX2(cur_alloc_space->free(), (size_t)MinTLABSize),
-                max_tlab_size);
+    return MIN2(MAX2(hr->free(), (size_t) MinTLABSize), max_tlab_size);
  }
 }

@@ -3364,6 +3134,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
  }

  verify_region_sets_optional();
+  verify_dirty_young_regions();

  {
    // This call will decide whether this pause is an initial-mark
@@ -3425,8 +3196,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {

      if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) {
        HandleMark hm;  // Discard invalid handles created during verification
-        prepare_for_verify();
        gclog_or_tty->print(" VerifyBeforeGC:");
+        prepare_for_verify();
        Universe::verify(false);
      }

@@ -3442,7 +3213,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {

      // Forget the current alloc region (we might even choose it to be part
      // of the collection set!).
-      abandon_cur_alloc_region();
+      release_mutator_alloc_region();

      // The elapsed time induced by the start time below deliberately elides
      // the possible verification above.
@@ -3573,6 +3344,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
      g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty);
 #endif // YOUNG_LIST_VERBOSE

+      init_mutator_alloc_region();
+
      double end_time_sec = os::elapsedTime();
      double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
      g1_policy()->record_pause_time_ms(pause_time_ms);
@@ -3655,6 +3428,15 @@ size_t G1CollectedHeap::desired_plab_sz(GCAllocPurpose purpose)
  return gclab_word_size;
 }

+void G1CollectedHeap::init_mutator_alloc_region() {
+  assert(_mutator_alloc_region.get() == NULL, "pre-condition");
+  _mutator_alloc_region.init();
+}
+
+void G1CollectedHeap::release_mutator_alloc_region() {
+  _mutator_alloc_region.release();
+  assert(_mutator_alloc_region.get() == NULL, "post-condition");
+}

 void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) {
  assert(purpose >= 0 && purpose < GCAllocPurposeCount, "invalid purpose");
@@ -3879,7 +3661,7 @@ void G1CollectedHeap::release_gc_alloc_regions(bool totally) {
      if (r->is_empty()) {
        // We didn't actually allocate anything in it; let's just put
        // it back on the free list.
-        _free_list.add_as_tail(r);
+        _free_list.add_as_head(r);
      } else if (_retain_gc_alloc_region[ap] && !totally) {
        // retain it so that we can use it at the beginning of the next GC
        _retained_gc_alloc_regions[ap] = r;
@@ -5013,7 +4795,7 @@ void G1CollectedHeap::free_region(HeapRegion* hr,

  *pre_used += hr->used();
  hr->hr_clear(par, true /* clear_space */);
-  free_list->add_as_tail(hr);
+  free_list->add_as_head(hr);
 }

 void G1CollectedHeap::free_humongous_region(HeapRegion* hr,
@@ -5065,7 +4847,7 @@ void G1CollectedHeap::update_sets_after_freeing_regions(size_t pre_used,
  }
  if (free_list != NULL && !free_list->is_empty()) {
    MutexLockerEx x(FreeList_lock, Mutex::_no_safepoint_check_flag);
-    _free_list.add_as_tail(free_list);
+    _free_list.add_as_head(free_list);
  }
  if (humongous_proxy_set != NULL && !humongous_proxy_set->is_empty()) {
    MutexLockerEx x(OldSets_lock, Mutex::_no_safepoint_check_flag);
@@ -5140,10 +4922,8 @@ class G1VerifyCardTableCleanup: public HeapRegionClosure {
  CardTableModRefBS* _ct_bs;
 public:
  G1VerifyCardTableCleanup(CardTableModRefBS* ct_bs)
-    : _ct_bs(ct_bs)
-  { }
-  virtual bool doHeapRegion(HeapRegion* r)
-  {
+    : _ct_bs(ct_bs) { }
+  virtual bool doHeapRegion(HeapRegion* r) {
    MemRegion mr(r->bottom(), r->end());
    if (r->is_survivor()) {
      _ct_bs->verify_dirty_region(mr);
@@ -5153,6 +4933,29 @@ public:
    return false;
  }
 };
+
+void G1CollectedHeap::verify_dirty_young_list(HeapRegion* head) {
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set());
+  for (HeapRegion* hr = head; hr != NULL; hr = hr->get_next_young_region()) {
+    // We cannot guarantee that [bottom(),end()] is dirty.  Threads
+    // dirty allocated blocks as they allocate them. The thread that
+    // retires each region and replaces it with a new one will do a
+    // maximal allocation to fill in [pre_dummy_top(),end()] but will
+    // not dirty that area (one less thing to have to do while holding
+    // a lock). So we can only verify that [bottom(),pre_dummy_top()]
+    // is dirty. Also note that verify_dirty_region() requires
+    // mr.start() and mr.end() to be card aligned and pre_dummy_top()
+    // is not guaranteed to be.
+    MemRegion mr(hr->bottom(),
+                 ct_bs->align_to_card_boundary(hr->pre_dummy_top()));
+    ct_bs->verify_dirty_region(mr);
+  }
+}
+
+void G1CollectedHeap::verify_dirty_young_regions() {
+  verify_dirty_young_list(_young_list->first_region());
+  verify_dirty_young_list(_young_list->first_survivor_region());
+}
 #endif

 void G1CollectedHeap::cleanUpCardTable() {
@@ -5500,6 +5303,44 @@ bool G1CollectedHeap::is_in_closed_subset(const void* p) const {
  }
 }

+HeapRegion* G1CollectedHeap::new_mutator_alloc_region(size_t word_size,
+                                                      bool force) {
+  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
+  assert(!force || g1_policy()->can_expand_young_list(),
+         "if force is true we should be able to expand the young list");
+  if (force || !g1_policy()->is_young_list_full()) {
+    HeapRegion* new_alloc_region = new_region(word_size,
+                                              false /* do_expand */);
+    if (new_alloc_region != NULL) {
+      g1_policy()->update_region_num(true /* next_is_young */);
+      set_region_short_lived_locked(new_alloc_region);
+      return new_alloc_region;
+    }
+  }
+  return NULL;
+}
+
+void G1CollectedHeap::retire_mutator_alloc_region(HeapRegion* alloc_region,
+                                                  size_t allocated_bytes) {
+  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
+  assert(alloc_region->is_young(), "all mutator alloc regions should be young");
+
+  g1_policy()->add_region_to_incremental_cset_lhs(alloc_region);
+  _summary_bytes_used += allocated_bytes;
+}
+
+HeapRegion* MutatorAllocRegion::allocate_new_region(size_t word_size,
+                                                    bool force) {
+  return _g1h->new_mutator_alloc_region(word_size, force);
+}
+
+void MutatorAllocRegion::retire_region(HeapRegion* alloc_region,
+                                       size_t allocated_bytes) {
+  _g1h->retire_mutator_alloc_region(alloc_region, allocated_bytes);
+}
+
+// Heap region set verification
+
 class VerifyRegionListsClosure : public HeapRegionClosure {
 private:
  HumongousRegionSet* _humongous_set;

--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1COLLECTEDHEAP_HPP

 #include "gc_implementation/g1/concurrentMark.hpp"
+#include "gc_implementation/g1/g1AllocRegion.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
 #include "gc_implementation/g1/heapRegionSets.hpp"
 #include "gc_implementation/parNew/parGCAllocBuffer.hpp"
@@ -128,6 +129,15 @@ public:
  void          print();
 };

+class MutatorAllocRegion : public G1AllocRegion {
+protected:
+  virtual HeapRegion* allocate_new_region(size_t word_size, bool force);
+  virtual void retire_region(HeapRegion* alloc_region, size_t allocated_bytes);
+public:
+  MutatorAllocRegion()
+    : G1AllocRegion("Mutator Alloc Region", false /* bot_updates */) { }
+};
+
 class RefineCardTableEntryClosure;
 class G1CollectedHeap : public SharedHeap {
  friend class VM_G1CollectForAllocation;
@@ -135,6 +145,7 @@ class G1CollectedHeap : public SharedHeap {
  friend class VM_G1CollectFull;
  friend class VM_G1IncCollectionPause;
  friend class VMStructs;
+  friend class MutatorAllocRegion;

  // Closures used in implementation.
  friend class G1ParCopyHelper;
@@ -197,12 +208,15 @@ private:
  // The sequence of all heap regions in the heap.
  HeapRegionSeq* _hrs;

-  // The region from which normal-sized objects are currently being
-  // allocated.  May be NULL.
-  HeapRegion* _cur_alloc_region;
+  // Alloc region used to satisfy mutator allocation requests.
+  MutatorAllocRegion _mutator_alloc_region;
+
+  // It resets the mutator alloc region before new allocations can take place.
+  void init_mutator_alloc_region();
+
+  // It releases the mutator alloc region.
+  void release_mutator_alloc_region();

-  // Postcondition: cur_alloc_region == NULL.
-  void abandon_cur_alloc_region();
  void abandon_gc_alloc_regions();

  // The to-space memory regions into which objects are being copied during
@@ -360,27 +374,21 @@ protected:
  G1CollectorPolicy* _g1_policy;

  // This is the second level of trying to allocate a new region. If
-  // new_region_work didn't find a region in the free_list, this call
-  // will check whether there's anything available in the
-  // secondary_free_list and/or wait for more regions to appear in that
-  // list, if _free_regions_coming is set.
+  // new_region() didn't find a region on the free_list, this call will
+  // check whether there's anything available on the
+  // secondary_free_list and/or wait for more regions to appear on
+  // that list, if _free_regions_coming is set.
  HeapRegion* new_region_try_secondary_free_list();

  // Try to allocate a single non-humongous HeapRegion sufficient for
  // an allocation of the given word_size. If do_expand is true,
  // attempt to expand the heap if necessary to satisfy the allocation
  // request.
-  HeapRegion* new_region_work(size_t word_size, bool do_expand);
+  HeapRegion* new_region(size_t word_size, bool do_expand);

-  // Try to allocate a new region to be used for allocation by a
-  // mutator thread. Attempt to expand the heap if no region is
+  // Try to allocate a new region to be used for allocation by
+  // a GC thread. It will try to expand the heap if no region is
  // available.
-  HeapRegion* new_alloc_region(size_t word_size) {
-    return new_region_work(word_size, false /* do_expand */);
-  }
-
-  // Try to allocate a new region to be used for allocation by a GC
-  // thread. Attempt to expand the heap if no region is available.
  HeapRegion* new_gc_alloc_region(int purpose, size_t word_size);

  // Attempt to satisfy a humongous allocation request of the given
@@ -415,10 +423,6 @@ protected:
  // * All non-TLAB allocation requests should go to mem_allocate()
  //   and mem_allocate() should never be called with is_tlab == true.
  //
-  // * If the GC locker is active we currently stall until we can
-  //   allocate a new young region. This will be changed in the
-  //   near future (see CR 6994056).
-  //
  // * If either call cannot satisfy the allocation request using the
  //   current allocating region, they will try to get a new one. If
  //   this fails, they will attempt to do an evacuation pause and
@@ -441,122 +445,38 @@ protected:
                                 bool   is_tlab, /* expected to be false */
                                 bool*  gc_overhead_limit_was_exceeded);

-  // The following methods, allocate_from_cur_allocation_region(),
-  // attempt_allocation(), attempt_allocation_locked(),
-  // replace_cur_alloc_region_and_allocate(),
-  // attempt_allocation_slow(), and attempt_allocation_humongous()
-  // have very awkward pre- and post-conditions with respect to
-  // locking:
-  //
-  // If they are called outside a safepoint they assume the caller
-  // holds the Heap_lock when it calls them. However, on exit they
-  // will release the Heap_lock if they return a non-NULL result, but
-  // keep holding the Heap_lock if they return a NULL result. The
-  // reason for this is that we need to dirty the cards that span
-  // allocated blocks on young regions to avoid having to take the
-  // slow path of the write barrier (for performance reasons we don't
-  // update RSets for references whose source is a young region, so we
-  // don't need to look at dirty cards on young regions). But, doing
-  // this card dirtying while holding the Heap_lock can be a
-  // scalability bottleneck, especially given that some allocation
-  // requests might be of non-trivial size (and the larger the region
-  // size is, the fewer allocations requests will be considered
-  // humongous, as the humongous size limit is a fraction of the
-  // region size). So, when one of these calls succeeds in allocating
-  // a block it does the card dirtying after it releases the Heap_lock
-  // which is why it will return without holding it.
-  //
-  // The above assymetry is the reason why locking / unlocking is done
-  // explicitly (i.e., with Heap_lock->lock() and
-  // Heap_lock->unlocked()) instead of using MutexLocker and
-  // MutexUnlocker objects. The latter would ensure that the lock is
-  // unlocked / re-locked at every possible exit out of the basic
-  // block. However, we only want that action to happen in selected
-  // places.
-  //
-  // Further, if the above methods are called during a safepoint, then
-  // naturally there's no assumption about the Heap_lock being held or
-  // there's no attempt to unlock it. The parameter at_safepoint
-  // indicates whether the call is made during a safepoint or not (as
-  // an optimization, to avoid reading the global flag with
-  // SafepointSynchronize::is_at_safepoint()).
-  //
-  // The methods share these parameters:
-  //
-  // * word_size     : the size of the allocation request in words
-  // * at_safepoint  : whether the call is done at a safepoint; this
-  //                   also determines whether a GC is permitted
-  //                   (at_safepoint == false) or not (at_safepoint == true)
-  // * do_dirtying   : whether the method should dirty the allocated
-  //                   block before returning
-  //
-  // They all return either the address of the block, if they
-  // successfully manage to allocate it, or NULL.
-
-  // It tries to satisfy an allocation request out of the current
-  // alloc region, which is passed as a parameter. It assumes that the
-  // caller has checked that the current alloc region is not NULL.
-  // Given that the caller has to check the current alloc region for
-  // at least NULL, it might as well pass it as the first parameter so
-  // that the method doesn't have to read it from the
-  // _cur_alloc_region field again. It is called from both
-  // attempt_allocation() and attempt_allocation_locked() and the
-  // with_heap_lock parameter indicates whether the caller was holding
-  // the heap lock when it called it or not.
-  inline HeapWord* allocate_from_cur_alloc_region(HeapRegion* cur_alloc_region,
-                                                  size_t word_size,
-                                                  bool with_heap_lock);
-
-  // First-level of allocation slow path: it attempts to allocate out
-  // of the current alloc region in a lock-free manner using a CAS. If
-  // that fails it takes the Heap_lock and calls
-  // attempt_allocation_locked() for the second-level slow path.
-  inline HeapWord* attempt_allocation(size_t word_size);
-
-  // Second-level of allocation slow path: while holding the Heap_lock
-  // it tries to allocate out of the current alloc region and, if that
-  // fails, tries to allocate out of a new current alloc region.
-  inline HeapWord* attempt_allocation_locked(size_t word_size);
-
-  // It assumes that the current alloc region has been retired and
-  // tries to allocate a new one. If it's successful, it performs the
-  // allocation out of the new current alloc region and updates
-  // _cur_alloc_region. Normally, it would try to allocate a new
-  // region if the young gen is not full, unless can_expand is true in
-  // which case it would always try to allocate a new region.
-  HeapWord* replace_cur_alloc_region_and_allocate(size_t word_size,
-                                                  bool at_safepoint,
-                                                  bool do_dirtying,
-                                                  bool can_expand);
-
-  // Third-level of allocation slow path: when we are unable to
-  // allocate a new current alloc region to satisfy an allocation
-  // request (i.e., when attempt_allocation_locked() fails). It will
-  // try to do an evacuation pause, which might stall due to the GC
-  // locker, and retry the allocation attempt when appropriate.
-  HeapWord* attempt_allocation_slow(size_t word_size);
-
-  // The method that tries to satisfy a humongous allocation
-  // request. If it cannot satisfy it it will try to do an evacuation
-  // pause to perhaps reclaim enough space to be able to satisfy the
-  // allocation request afterwards.
+  // The following three methods take a gc_count_before_ret
+  // parameter which is used to return the GC count if the method
+  // returns NULL. Given that we are required to read the GC count
+  // while holding the Heap_lock, and these paths will take the
+  // Heap_lock at some point, it's easier to get them to read the GC
+  // count while holding the Heap_lock before they return NULL instead
+  // of the caller (namely: mem_allocate()) having to also take the
+  // Heap_lock just to read the GC count.
+
+  // First-level mutator allocation attempt: try to allocate out of
+  // the mutator alloc region without taking the Heap_lock. This
+  // should only be used for non-humongous allocations.
+  inline HeapWord* attempt_allocation(size_t word_size,
+                                      unsigned int* gc_count_before_ret);
+
+  // Second-level mutator allocation attempt: take the Heap_lock and
+  // retry the allocation attempt, potentially scheduling a GC
+  // pause. This should only be used for non-humongous allocations.
+  HeapWord* attempt_allocation_slow(size_t word_size,
+                                    unsigned int* gc_count_before_ret);
+
+  // Takes the Heap_lock and attempts a humongous allocation. It can
+  // potentially schedule a GC pause.
  HeapWord* attempt_allocation_humongous(size_t word_size,
-                                         bool at_safepoint);
-
-  // It does the common work when we are retiring the current alloc region.
-  inline void retire_cur_alloc_region_common(HeapRegion* cur_alloc_region);
-
-  // It retires the current alloc region, which is passed as a
-  // parameter (since, typically, the caller is already holding on to
-  // it). It sets _cur_alloc_region to NULL.
-  void retire_cur_alloc_region(HeapRegion* cur_alloc_region);
+                                         unsigned int* gc_count_before_ret);

-  // It attempts to do an allocation immediately before or after an
-  // evacuation pause and can only be called by the VM thread. It has
-  // slightly different assumptions that the ones before (i.e.,
-  // assumes that the current alloc region has been retired).
+  // Allocation attempt that should be called during safepoints (e.g.,
+  // at the end of a successful GC). expect_null_mutator_alloc_region
+  // specifies whether the mutator alloc region is expected to be NULL
+  // or not.
  HeapWord* attempt_allocation_at_safepoint(size_t word_size,
-                                            bool expect_null_cur_alloc_region);
+                                       bool expect_null_mutator_alloc_region);

  // It dirties the cards that cover the block so that so that the post
  // write barrier never queues anything when updating objects on this
@@ -583,6 +503,12 @@ protected:
  // GC pause.
  void  retire_alloc_region(HeapRegion* alloc_region, bool par);

+  // These two methods are the "callbacks" from the G1AllocRegion class.
+
+  HeapRegion* new_mutator_alloc_region(size_t word_size, bool force);
+  void retire_mutator_alloc_region(HeapRegion* alloc_region,
+                                   size_t allocated_bytes);
+
  // - if explicit_gc is true, the GC is for a System.gc() or a heap
  //   inspection request and should collect the entire heap
  // - if clear_all_soft_refs is true, all soft references should be
@@ -1027,6 +953,9 @@ public:
  // The number of regions available for "regular" expansion.
  size_t expansion_regions() { return _expansion_regions; }

+  void verify_dirty_young_list(HeapRegion* head) PRODUCT_RETURN;
+  void verify_dirty_young_regions() PRODUCT_RETURN;
+
  // verify_region_sets() performs verification over the region
  // lists. It will be compiled in the product code to be used when
  // necessary (i.e., during heap verification).
@@ -1061,7 +990,7 @@ public:
  }

  void append_secondary_free_list() {
-    _free_list.add_as_tail(&_secondary_free_list);
+    _free_list.add_as_head(&_secondary_free_list);
  }

  void append_secondary_free_list_if_not_empty_with_lock() {
@@ -1128,7 +1057,13 @@ public:
    return _g1_reserved.contains(p);
  }

-  // Returns a MemRegion that corresponds to the space that  has been
+  // Returns a MemRegion that corresponds to the space that has been
+  // reserved for the heap
+  MemRegion g1_reserved() {
+    return _g1_reserved;
+  }
+
+  // Returns a MemRegion that corresponds to the space that has been
  // committed in the heap
  MemRegion g1_committed() {
    return _g1_committed;

--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
@@ -27,6 +27,7 @@

 #include "gc_implementation/g1/concurrentMark.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.hpp"
+#include "gc_implementation/g1/g1AllocRegion.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "utilities/taskqueue.hpp"
@@ -59,131 +60,23 @@ inline bool G1CollectedHeap::obj_in_cs(oop obj) {
  return r != NULL && r->in_collection_set();
 }

-// See the comment in the .hpp file about the locking protocol and
-// assumptions of this method (and other related ones).
 inline HeapWord*
-G1CollectedHeap::allocate_from_cur_alloc_region(HeapRegion* cur_alloc_region,
-                                                size_t word_size,
-                                                bool with_heap_lock) {
-  assert_not_at_safepoint();
-  assert(with_heap_lock == Heap_lock->owned_by_self(),
-         "with_heap_lock and Heap_lock->owned_by_self() should be a tautology");
-  assert(cur_alloc_region != NULL, "pre-condition of the method");
-  assert(cur_alloc_region->is_young(),
-         "we only support young current alloc regions");
-  assert(!isHumongous(word_size), "allocate_from_cur_alloc_region() "
-         "should not be used for humongous allocations");
-  assert(!cur_alloc_region->isHumongous(), "Catch a regression of this bug.");
-
-  assert(!cur_alloc_region->is_empty(),
-         err_msg("region ["PTR_FORMAT","PTR_FORMAT"] should not be empty",
-                 cur_alloc_region->bottom(), cur_alloc_region->end()));
-  HeapWord* result = cur_alloc_region->par_allocate_no_bot_updates(word_size);
-  if (result != NULL) {
-    assert(is_in(result), "result should be in the heap");
-
-    if (with_heap_lock) {
-      Heap_lock->unlock();
-    }
-    assert_heap_not_locked();
-    // Do the dirtying after we release the Heap_lock.
-    dirty_young_block(result, word_size);
-    return result;
-  }
-
-  if (with_heap_lock) {
-    assert_heap_locked();
-  } else {
-    assert_heap_not_locked();
-  }
-  return NULL;
-}
-
-// See the comment in the .hpp file about the locking protocol and
-// assumptions of this method (and other related ones).
-inline HeapWord*
-G1CollectedHeap::attempt_allocation(size_t word_size) {
+G1CollectedHeap::attempt_allocation(size_t word_size,
+                                    unsigned int* gc_count_before_ret) {
  assert_heap_not_locked_and_not_at_safepoint();
-  assert(!isHumongous(word_size), "attempt_allocation() should not be called "
-         "for humongous allocation requests");
-
-  HeapRegion* cur_alloc_region = _cur_alloc_region;
-  if (cur_alloc_region != NULL) {
-    HeapWord* result = allocate_from_cur_alloc_region(cur_alloc_region,
-                                                   word_size,
-                                                   false /* with_heap_lock */);
-    assert_heap_not_locked();
-    if (result != NULL) {
-      return result;
-    }
-  }
-
-  // Our attempt to allocate lock-free failed as the current
-  // allocation region is either NULL or full. So, we'll now take the
-  // Heap_lock and retry.
-  Heap_lock->lock();
-
-  HeapWord* result = attempt_allocation_locked(word_size);
-  if (result != NULL) {
-    assert_heap_not_locked();
-    return result;
-  }
-
-  assert_heap_locked();
-  return NULL;
-}
-
-inline void
-G1CollectedHeap::retire_cur_alloc_region_common(HeapRegion* cur_alloc_region) {
-  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
-  assert(cur_alloc_region != NULL && cur_alloc_region == _cur_alloc_region,
-         "pre-condition of the call");
-  assert(cur_alloc_region->is_young(),
-         "we only support young current alloc regions");
-
-  // The region is guaranteed to be young
-  g1_policy()->add_region_to_incremental_cset_lhs(cur_alloc_region);
-  _summary_bytes_used += cur_alloc_region->used();
-  _cur_alloc_region = NULL;
-}
+  assert(!isHumongous(word_size), "attempt_allocation() should not "
+         "be called for humongous allocation requests");

-inline HeapWord*
-G1CollectedHeap::attempt_allocation_locked(size_t word_size) {
-  assert_heap_locked_and_not_at_safepoint();
-  assert(!isHumongous(word_size), "attempt_allocation_locked() "
-         "should not be called for humongous allocation requests");
-
-  // First, reread the current alloc region and retry the allocation
-  // in case somebody replaced it while we were waiting to get the
-  // Heap_lock.
-  HeapRegion* cur_alloc_region = _cur_alloc_region;
-  if (cur_alloc_region != NULL) {
-    HeapWord* result = allocate_from_cur_alloc_region(
-                                                  cur_alloc_region, word_size,
-                                                  true /* with_heap_lock */);
-    if (result != NULL) {
-      assert_heap_not_locked();
-      return result;
-    }
-
-    // We failed to allocate out of the current alloc region, so let's
-    // retire it before getting a new one.
-    retire_cur_alloc_region(cur_alloc_region);
+  HeapWord* result = _mutator_alloc_region.attempt_allocation(word_size,
+                                                      false /* bot_updates */);
+  if (result == NULL) {
+    result = attempt_allocation_slow(word_size, gc_count_before_ret);
  }
-
-  assert_heap_locked();
-  // Try to get a new region and allocate out of it
-  HeapWord* result = replace_cur_alloc_region_and_allocate(word_size,
-                                                     false, /* at_safepoint */
-                                                     true,  /* do_dirtying */
-                                                     false  /* can_expand */);
+  assert_heap_not_locked();
  if (result != NULL) {
-    assert_heap_not_locked();
-    return result;
+    dirty_young_block(result, word_size);
  }
-
-  assert_heap_locked();
-  return NULL;
+  return result;
 }

 // It dirties the cards that cover the block so that so that the post

--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
@@ -307,6 +307,7 @@ G1CollectorPolicy::G1CollectorPolicy() :
  _par_last_termination_times_ms = new double[_parallel_gc_threads];
  _par_last_termination_attempts = new double[_parallel_gc_threads];
  _par_last_gc_worker_end_times_ms = new double[_parallel_gc_threads];
+  _par_last_gc_worker_times_ms = new double[_parallel_gc_threads];

  // start conservatively
  _expensive_region_limit_ms = 0.5 * (double) MaxGCPauseMillis;
@@ -911,6 +912,7 @@ void G1CollectorPolicy::record_collection_pause_start(double start_time_sec,
    _par_last_termination_times_ms[i] = -1234.0;
    _par_last_termination_attempts[i] = -1234.0;
    _par_last_gc_worker_end_times_ms[i] = -1234.0;
+    _par_last_gc_worker_times_ms[i] = -1234.0;
  }
 #endif

@@ -1063,8 +1065,7 @@ T sum_of(T* sum_arr, int start, int n, int N) {

 void G1CollectorPolicy::print_par_stats(int level,
                                        const char* str,
-                                        double* data,
-                                         bool summary) {
+                                        double* data) {
  double min = data[0], max = data[0];
  double total = 0.0;
  LineBuffer buf(level);
@@ -1078,20 +1079,15 @@ void G1CollectorPolicy::print_par_stats(int level,
    total += val;
    buf.append("  %3.1lf", val);
  }
-  if (summary) {
-    buf.append_and_print_cr("");
-    double avg = total / (double) ParallelGCThreads;
-    buf.append(" ");
-    buf.append("Avg: %5.1lf, Min: %5.1lf, Max: %5.1lf",
-                        avg, min, max);
-  }
-  buf.append_and_print_cr("]");
+  buf.append_and_print_cr("");
+  double avg = total / (double) ParallelGCThreads;
+  buf.append_and_print_cr(" Avg: %5.1lf, Min: %5.1lf, Max: %5.1lf, Diff: %5.1lf]",
+    avg, min, max, max - min);
 }

 void G1CollectorPolicy::print_par_sizes(int level,
                                        const char* str,
-                                        double* data,
-                                        bool summary) {
+                                        double* data) {
  double min = data[0], max = data[0];
  double total = 0.0;
  LineBuffer buf(level);
@@ -1105,14 +1101,10 @@ void G1CollectorPolicy::print_par_sizes(int level,
    total += val;
    buf.append(" %d", (int) val);
  }
-  if (summary) {
-    buf.append_and_print_cr("");
-    double avg = total / (double) ParallelGCThreads;
-    buf.append(" ");
-    buf.append("Sum: %d, Avg: %d, Min: %d, Max: %d",
-               (int)total, (int)avg, (int)min, (int)max);
-  }
-  buf.append_and_print_cr("]");
+  buf.append_and_print_cr("");
+  double avg = total / (double) ParallelGCThreads;
+  buf.append_and_print_cr(" Sum: %d, Avg: %d, Min: %d, Max: %d, Diff: %d]",
+    (int)total, (int)avg, (int)min, (int)max, (int)max - (int)min);
 }

 void G1CollectorPolicy::print_stats (int level,
@@ -1421,22 +1413,22 @@ void G1CollectorPolicy::record_collection_pause_end() {
    }
    if (parallel) {
      print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
-      print_par_stats(2, "GC Worker Start Time",
-                      _par_last_gc_worker_start_times_ms, false);
+      print_par_stats(2, "GC Worker Start Time", _par_last_gc_worker_start_times_ms);
      print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
-      print_par_sizes(3, "Processed Buffers",
-                      _par_last_update_rs_processed_buffers, true);
-      print_par_stats(2, "Ext Root Scanning",
-                      _par_last_ext_root_scan_times_ms);
-      print_par_stats(2, "Mark Stack Scanning",
-                      _par_last_mark_stack_scan_times_ms);
+      print_par_sizes(3, "Processed Buffers", _par_last_update_rs_processed_buffers);
+      print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
+      print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
      print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms);
      print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
      print_par_stats(2, "Termination", _par_last_termination_times_ms);
-      print_par_sizes(3, "Termination Attempts",
-                      _par_last_termination_attempts, true);
-      print_par_stats(2, "GC Worker End Time",
-                      _par_last_gc_worker_end_times_ms, false);
+      print_par_sizes(3, "Termination Attempts", _par_last_termination_attempts);
+      print_par_stats(2, "GC Worker End Time", _par_last_gc_worker_end_times_ms);
+
+      for (int i = 0; i < _parallel_gc_threads; i++) {
+        _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] - _par_last_gc_worker_start_times_ms[i];
+      }
+      print_par_stats(2, "GC Worker Times", _par_last_gc_worker_times_ms);
+
      print_stats(2, "Other", parallel_other_time);
      print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
    } else {

--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
@@ -182,6 +182,7 @@ protected:
  double* _par_last_termination_times_ms;
  double* _par_last_termination_attempts;
  double* _par_last_gc_worker_end_times_ms;
+  double* _par_last_gc_worker_times_ms;

  // indicates that we are in young GC mode
  bool _in_young_gc_mode;
@@ -569,11 +570,8 @@ protected:
  void print_stats(int level, const char* str, double value);
  void print_stats(int level, const char* str, int value);

-  void print_par_stats(int level, const char* str, double* data) {
-    print_par_stats(level, str, data, true);
-  }
-  void print_par_stats(int level, const char* str, double* data, bool summary);
-  void print_par_sizes(int level, const char* str, double* data, bool summary);
+  void print_par_stats(int level, const char* str, double* data);
+  void print_par_sizes(int level, const char* str, double* data);

  void check_other_times(int level,
                         NumberSeq* other_times_ms,

--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp
@@ -89,6 +89,11 @@
          "The number of discovered reference objects to process before "   \
          "draining concurrent marking work queues.")                       \
                                                                            \
+  experimental(bool, G1UseConcMarkReferenceProcessing, false,               \
+          "If true, enable reference discovery during concurrent "          \
+          "marking and reference processing at the end of remark "          \
+          "(unsafe).")                                                      \
+                                                                            \
  develop(bool, G1SATBBarrierPrintNullPreVals, false,                       \
          "If true, count frac of ptr writes with null pre-vals.")          \
                                                                            \
@@ -138,9 +143,9 @@
  develop(bool, G1RSCountHisto, false,                                      \
          "If true, print a histogram of RS occupancies after each pause")  \
                                                                            \
-  develop(intx, G1PrintRegionLivenessInfo, 0,                               \
-          "When > 0, print the occupancies of the <n> best and worst"       \
-          "regions.")                                                       \
+  product(bool, G1PrintRegionLivenessInfo, false,                           \
+          "Prints the liveness information for all regions in the heap "    \
+          "at the end of a marking cycle.")                                 \
                                                                            \
  develop(bool, G1PrintParCleanupStats, false,                              \
          "When true, print extra stats about parallel cleanup.")           \
@@ -193,6 +198,10 @@
  develop(intx, G1ConcRSHotCardLimit, 4,                                    \
          "The threshold that defines (>=) a hot card.")                    \
                                                                            \
+  develop(intx, G1MaxHotCardCountSizePercent, 25,                           \
+          "The maximum size of the hot card count cache as a "              \
+          "percentage of the number of cards for the maximum heap.")        \
+                                                                            \
  develop(bool, G1PrintOopAppls, false,                                     \
          "When true, print applications of closures to external locs.")    \
                                                                            \

--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp
@@ -360,6 +360,7 @@ void HeapRegion::hr_clear(bool par, bool clear_space) {
  set_young_index_in_cset(-1);
  uninstall_surv_rate_group();
  set_young_type(NotYoung);
+  reset_pre_dummy_top();

  if (!par) {
    // If this is parallel, this will be done later.
@@ -923,11 +924,11 @@ void G1OffsetTableContigSpace::set_saved_mark() {
    ContiguousSpace::set_saved_mark();
    OrderAccess::storestore();
    _gc_time_stamp = curr_gc_time_stamp;
-    // The following fence is to force a flush of the writes above, but
-    // is strictly not needed because when an allocating worker thread
-    // calls set_saved_mark() it does so under the ParGCRareEvent_lock;
-    // when the lock is released, the write will be flushed.
-    // OrderAccess::fence();
+    // No need to do another barrier to flush the writes above. If
+    // this is called in parallel with other threads trying to
+    // allocate into the region, the caller should call this while
+    // holding a lock and when the lock is released the writes will be
+    // flushed.
  }
 }


--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp
@@ -149,6 +149,13 @@ class G1OffsetTableContigSpace: public ContiguousSpace {
  G1BlockOffsetArrayContigSpace _offsets;
  Mutex _par_alloc_lock;
  volatile unsigned _gc_time_stamp;
+  // When we need to retire an allocation region, while other threads
+  // are also concurrently trying to allocate into it, we typically
+  // allocate a dummy object at the end of the region to ensure that
+  // no more allocations can take place in it. However, sometimes we
+  // want to know where the end of the last "real" object we allocated
+  // into the region was and this is what this keeps track.
+  HeapWord* _pre_dummy_top;

 public:
  // Constructor.  If "is_zeroed" is true, the MemRegion "mr" may be
@@ -163,6 +170,17 @@ class G1OffsetTableContigSpace: public ContiguousSpace {
  virtual void set_saved_mark();
  void reset_gc_time_stamp() { _gc_time_stamp = 0; }

+  // See the comment above in the declaration of _pre_dummy_top for an
+  // explanation of what it is.
+  void set_pre_dummy_top(HeapWord* pre_dummy_top) {
+    assert(is_in(pre_dummy_top) && pre_dummy_top <= top(), "pre-condition");
+    _pre_dummy_top = pre_dummy_top;
+  }
+  HeapWord* pre_dummy_top() {
+    return (_pre_dummy_top == NULL) ? top() : _pre_dummy_top;
+  }
+  void reset_pre_dummy_top() { _pre_dummy_top = NULL; }
+
  virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
  virtual void clear(bool mangle_space);

@@ -380,13 +398,16 @@ class HeapRegion: public G1OffsetTableContigSpace {

  // The number of bytes marked live in the region in the last marking phase.
  size_t marked_bytes()    { return _prev_marked_bytes; }
+  size_t live_bytes() {
+    return (top() - prev_top_at_mark_start()) * HeapWordSize + marked_bytes();
+  }
+
  // The number of bytes counted in the next marking.
  size_t next_marked_bytes() { return _next_marked_bytes; }
  // The number of bytes live wrt the next marking.
  size_t next_live_bytes() {
-    return (top() - next_top_at_mark_start())
-      * HeapWordSize
-      + next_marked_bytes();
+    return
+      (top() - next_top_at_mark_start()) * HeapWordSize + next_marked_bytes();
  }

  // A lower bound on the amount of garbage bytes in the region.

--- a/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp
+++ b/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp
@@ -38,15 +38,8 @@ inline HeapWord* G1OffsetTableContigSpace::allocate(size_t size) {
 // this is used for larger LAB allocations only.
 inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t size) {
  MutexLocker x(&_par_alloc_lock);
-  // This ought to be just "allocate", because of the lock above, but that
-  // ContiguousSpace::allocate asserts that either the allocating thread
-  // holds the heap lock or it is the VM thread and we're at a safepoint.
-  // The best I (dld) could figure was to put a field in ContiguousSpace
-  // meaning "locking at safepoint taken care of", and set/reset that
-  // here.  But this will do for now, especially in light of the comment
-  // above.  Perhaps in the future some lock-free manner of keeping the
-  // coordination.
-  HeapWord* res = ContiguousSpace::par_allocate(size);
+  // Given that we take the lock no need to use par_allocate() here.
+  HeapWord* res = ContiguousSpace::allocate(size);
  if (res != NULL) {
    _offsets.alloc_block(res, size);
  }

--- a/src/share/vm/gc_implementation/g1/heapRegionSet.cpp
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.cpp
@@ -261,6 +261,45 @@ void HeapRegionLinkedList::fill_in_ext_msg_extra(hrs_ext_msg* msg) {
  msg->append(" hd: "PTR_FORMAT" tl: "PTR_FORMAT, head(), tail());
 }

+void HeapRegionLinkedList::add_as_head(HeapRegionLinkedList* from_list) {
+  hrs_assert_mt_safety_ok(this);
+  hrs_assert_mt_safety_ok(from_list);
+
+  verify_optional();
+  from_list->verify_optional();
+
+  if (from_list->is_empty()) return;
+
+#ifdef ASSERT
+  HeapRegionLinkedListIterator iter(from_list);
+  while (iter.more_available()) {
+    HeapRegion* hr = iter.get_next();
+    // In set_containing_set() we check that we either set the value
+    // from NULL to non-NULL or vice versa to catch bugs. So, we have
+    // to NULL it first before setting it to the value.
+    hr->set_containing_set(NULL);
+    hr->set_containing_set(this);
+  }
+#endif // ASSERT
+
+  if (_head != NULL) {
+    assert(length() >  0 && _tail != NULL, hrs_ext_msg(this, "invariant"));
+    from_list->_tail->set_next(_head);
+  } else {
+    assert(length() == 0 && _head == NULL, hrs_ext_msg(this, "invariant"));
+    _tail = from_list->_tail;
+  }
+  _head = from_list->_head;
+
+  _length           += from_list->length();
+  _region_num       += from_list->region_num();
+  _total_used_bytes += from_list->total_used_bytes();
+  from_list->clear();
+
+  verify_optional();
+  from_list->verify_optional();
+}
+
 void HeapRegionLinkedList::add_as_tail(HeapRegionLinkedList* from_list) {
  hrs_assert_mt_safety_ok(this);
  hrs_assert_mt_safety_ok(from_list);

--- a/src/share/vm/gc_implementation/g1/heapRegionSet.hpp
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.hpp
@@ -277,6 +277,10 @@ protected:
  }

 public:
+  // It adds hr to the list as the new head. The region should not be
+  // a member of another set.
+  inline void add_as_head(HeapRegion* hr);
+
  // It adds hr to the list as the new tail. The region should not be
  // a member of another set.
  inline void add_as_tail(HeapRegion* hr);
@@ -288,6 +292,11 @@ public:
  // Convenience method.
  inline HeapRegion* remove_head_or_null();

+  // It moves the regions from from_list to this list and empties
+  // from_list. The new regions will appear in the same order as they
+  // were in from_list and be linked in the beginning of this list.
+  void add_as_head(HeapRegionLinkedList* from_list);
+
  // It moves the regions from from_list to this list and empties
  // from_list. The new regions will appear in the same order as they
  // were in from_list and be linked in the end of this list.

--- a/src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp
@@ -110,6 +110,23 @@ inline void HeapRegionSet::remove_with_proxy(HeapRegion* hr,

 //////////////////// HeapRegionLinkedList ////////////////////

+inline void HeapRegionLinkedList::add_as_head(HeapRegion* hr) {
+  hrs_assert_mt_safety_ok(this);
+  assert((length() == 0 && _head == NULL && _tail == NULL) ||
+         (length() >  0 && _head != NULL && _tail != NULL),
+         hrs_ext_msg(this, "invariant"));
+  // add_internal() will verify the region.
+  add_internal(hr);
+
+  // Now link the region.
+  if (_head != NULL) {
+    hr->set_next(_head);
+  } else {
+    _tail = hr;
+  }
+  _head = hr;
+}
+
 inline void HeapRegionLinkedList::add_as_tail(HeapRegion* hr) {
  hrs_assert_mt_safety_ok(this);
  assert((length() == 0 && _head == NULL && _tail == NULL) ||

--- a/src/share/vm/memory/cardTableModRefBS.hpp
+++ b/src/share/vm/memory/cardTableModRefBS.hpp
@@ -382,6 +382,11 @@ public:
    return (addr_for(pcard) == p);
  }

+  HeapWord* align_to_card_boundary(HeapWord* p) {
+    jbyte* pcard = byte_for(p + card_size_in_words - 1);
+    return addr_for(pcard);
+  }
+
  // The kinds of precision a CardTableModRefBS may offer.
  enum PrecisionStyle {
    Precise,

--- a/src/share/vm/memory/cardTableRS.cpp
+++ b/src/share/vm/memory/cardTableRS.cpp
@@ -318,17 +318,28 @@ private:
 protected:
  template <class T> void do_oop_work(T* p) {
    HeapWord* jp = (HeapWord*)p;
-    if (jp >= _begin && jp < _end) {
-      oop obj = oopDesc::load_decode_heap_oop(p);
-      guarantee(obj == NULL ||
-                (HeapWord*)p < _boundary ||
-                (HeapWord*)obj >= _boundary,
-                "pointer on clean card crosses boundary");
-    }
+    assert(jp >= _begin && jp < _end,
+           err_msg("Error: jp " PTR_FORMAT " should be within "
+                   "[_begin, _end) = [" PTR_FORMAT "," PTR_FORMAT ")",
+                   _begin, _end));
+    oop obj = oopDesc::load_decode_heap_oop(p);
+    guarantee(obj == NULL || (HeapWord*)obj >= _boundary,
+              err_msg("pointer " PTR_FORMAT " at " PTR_FORMAT " on "
+                      "clean card crosses boundary" PTR_FORMAT,
+                      (HeapWord*)obj, jp, _boundary));
  }
+
 public:
  VerifyCleanCardClosure(HeapWord* b, HeapWord* begin, HeapWord* end) :
-    _boundary(b), _begin(begin), _end(end) {}
+    _boundary(b), _begin(begin), _end(end) {
+    assert(b <= begin,
+           err_msg("Error: boundary " PTR_FORMAT " should be at or below begin " PTR_FORMAT,
+                   b, begin));
+    assert(begin <= end,
+           err_msg("Error: begin " PTR_FORMAT " should be strictly below end " PTR_FORMAT,
+                   begin, end));
+  }
+
  virtual void do_oop(oop* p)       { VerifyCleanCardClosure::do_oop_work(p); }
  virtual void do_oop(narrowOop* p) { VerifyCleanCardClosure::do_oop_work(p); }
 };
@@ -392,13 +403,14 @@ void CardTableRS::verify_space(Space* s, HeapWord* gen_boundary) {
        }
      }
      // Now traverse objects until end.
-      HeapWord* cur = start_block;
-      VerifyCleanCardClosure verify_blk(gen_boundary, begin, end);
-      while (cur < end) {
-        if (s->block_is_obj(cur) && s->obj_is_alive(cur)) {
-          oop(cur)->oop_iterate(&verify_blk);
+      if (begin < end) {
+        MemRegion mr(begin, end);
+        VerifyCleanCardClosure verify_blk(gen_boundary, begin, end);
+        for (HeapWord* cur = start_block; cur < end; cur += s->block_size(cur)) {
+          if (s->block_is_obj(cur) && s->obj_is_alive(cur)) {
+            oop(cur)->oop_iterate(&verify_blk, mr);
+          }
        }
-        cur += s->block_size(cur);
      }
      cur_entry = first_dirty;
    } else {

--- a/src/share/vm/memory/space.cpp
+++ b/src/share/vm/memory/space.cpp
@@ -818,9 +818,14 @@ size_t ContiguousSpace::block_size(const HeapWord* p) const {
 // This version requires locking.
 inline HeapWord* ContiguousSpace::allocate_impl(size_t size,
                                                HeapWord* const end_value) {
+  // In G1 there are places where a GC worker can allocates into a
+  // region using this serial allocation code without being prone to a
+  // race with other GC workers (we ensure that no other GC worker can
+  // access the same region at the same time). So the assert below is
+  // too strong in the case of G1.
  assert(Heap_lock->owned_by_self() ||
         (SafepointSynchronize::is_at_safepoint() &&
-          Thread::current()->is_VM_thread()),
+                               (Thread::current()->is_VM_thread() || UseG1GC)),
         "not locked");
  HeapWord* obj = top();
  if (pointer_delta(end_value, obj) >= size) {

--- a/src/share/vm/oops/constantPoolKlass.cpp
+++ b/src/share/vm/oops/constantPoolKlass.cpp
@@ -245,13 +245,13 @@ int constantPoolKlass::oop_oop_iterate_m(oop obj, OopClosure* blk, MemRegion mr)
  }
  oop* addr;
  addr = cp->tags_addr();
-  blk->do_oop(addr);
+  if (mr.contains(addr)) blk->do_oop(addr);
  addr = cp->cache_addr();
-  blk->do_oop(addr);
+  if (mr.contains(addr)) blk->do_oop(addr);
  addr = cp->operands_addr();
-  blk->do_oop(addr);
+  if (mr.contains(addr)) blk->do_oop(addr);
  addr = cp->pool_holder_addr();
-  blk->do_oop(addr);
+  if (mr.contains(addr)) blk->do_oop(addr);
  return size;
 }


--- a/src/share/vm/runtime/globals.hpp
+++ b/src/share/vm/runtime/globals.hpp
@@ -1924,7 +1924,7 @@ class CommandLineFlags {
  experimental(intx, WorkStealingSleepMillis, 1,                            \
          "Sleep time when sleep is used for yields")                       \
                                                                            \
-  experimental(uintx, WorkStealingYieldsBeforeSleep, 1000,                  \
+  experimental(uintx, WorkStealingYieldsBeforeSleep, 5000,                  \
          "Number of yields before a sleep is done during workstealing")    \
                                                                            \
  experimental(uintx, WorkStealingHardSpins, 4096,                          \