Merge

88ac75fa · jmasa · ccde0c85 · ee75dc2e · 88ac75fa · 88ac75fa
19 changed file
--- a/src/os/linux/vm/globals_linux.hpp
+++ b/src/os/linux/vm/globals_linux.hpp
@@ -29,13 +29,19 @@
 // Defines Linux specific flags. They are not available on other platforms.
 //
 #define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \
-  product(bool, UseOprofile, false,                                 \
-        "enable support for Oprofile profiler")                     \
-                                                                    \
-  product(bool, UseLinuxPosixThreadCPUClocks, true,                 \
-          "enable fast Linux Posix clocks where available")
-// NB: The default value of UseLinuxPosixThreadCPUClocks may be
-// overridden in Arguments::parse_each_vm_init_arg.
+  product(bool, UseOprofile, false,                                     \
+        "enable support for Oprofile profiler")                         \
+                                                                        \
+  product(bool, UseLinuxPosixThreadCPUClocks, true,                     \
+          "enable fast Linux Posix clocks where available")             \
+/*  NB: The default value of UseLinuxPosixThreadCPUClocks may be        \
+    overridden in Arguments::parse_each_vm_init_arg.  */                \
+                                                                        \
+  product(bool, UseHugeTLBFS, false,                                    \
+          "Use MAP_HUGETLB for large pages")                            \
+                                                                        \
+  product(bool, UseSHM, false,                                          \
+          "Use SYSV shared memory for large pages")

 //
 // Defines Linux-specific default values. The flags are available on all

--- a/src/os/linux/vm/os_linux.cpp
+++ b/src/os/linux/vm/os_linux.cpp
@@ -2465,16 +2465,40 @@ bool os::commit_memory(char* addr, size_t size, bool exec) {
  return res != (uintptr_t) MAP_FAILED;
 }

+// Define MAP_HUGETLB here so we can build HotSpot on old systems.
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB 0x40000
+#endif
+
+// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
+#ifndef MADV_HUGEPAGE
+#define MADV_HUGEPAGE 14
+#endif
+
 bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
                       bool exec) {
+  if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
+    int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
+    uintptr_t res =
+      (uintptr_t) ::mmap(addr, size, prot,
+                         MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
+                         -1, 0);
+    return res != (uintptr_t) MAP_FAILED;
+  }
+
  return commit_memory(addr, size, exec);
 }

-void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
+void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
+  if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
+    // We don't check the return value: madvise(MADV_HUGEPAGE) may not
+    // be supported or the memory may already be backed by huge pages.
+    ::madvise(addr, bytes, MADV_HUGEPAGE);
+  }
+}

 void os::free_memory(char *addr, size_t bytes) {
-  ::mmap(addr, bytes, PROT_READ | PROT_WRITE,
-         MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
+  ::madvise(addr, bytes, MADV_DONTNEED);
 }

 void os::numa_make_global(char *addr, size_t bytes) {
@@ -2812,6 +2836,43 @@ bool os::unguard_memory(char* addr, size_t size) {
  return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
 }

+bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
+  bool result = false;
+  void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
+                  MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
+                  -1, 0);
+
+  if (p != (void *) -1) {
+    // We don't know if this really is a huge page or not.
+    FILE *fp = fopen("/proc/self/maps", "r");
+    if (fp) {
+      while (!feof(fp)) {
+        char chars[257];
+        long x = 0;
+        if (fgets(chars, sizeof(chars), fp)) {
+          if (sscanf(chars, "%lx-%*lx", &x) == 1
+              && x == (long)p) {
+            if (strstr (chars, "hugepage")) {
+              result = true;
+              break;
+            }
+          }
+        }
+      }
+      fclose(fp);
+    }
+    munmap (p, page_size);
+    if (result)
+      return true;
+  }
+
+  if (warn) {
+    warning("HugeTLBFS is not supported by the operating system.");
+  }
+
+  return result;
+}
+
 /*
 * Set the coredump_filter bits to include largepages in core dump (bit 6)
 *
@@ -2854,7 +2915,16 @@ static void set_coredump_filter(void) {
 static size_t _large_page_size = 0;

 bool os::large_page_init() {
-  if (!UseLargePages) return false;
+  if (!UseLargePages) {
+    UseHugeTLBFS = false;
+    UseSHM = false;
+    return false;
+  }
+
+  if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) {
+    // Our user has not expressed a preference, so we'll try both.
+    UseHugeTLBFS = UseSHM = true;
+  }

  if (LargePageSizeInBytes) {
    _large_page_size = LargePageSizeInBytes;
@@ -2899,6 +2969,9 @@ bool os::large_page_init() {
    }
  }

+  // print a warning if any large page related flag is specified on command line
+  bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
+
  const size_t default_page_size = (size_t)Linux::page_size();
  if (_large_page_size > default_page_size) {
    _page_sizes[0] = _large_page_size;
@@ -2906,6 +2979,14 @@ bool os::large_page_init() {
    _page_sizes[2] = 0;
  }

+  UseHugeTLBFS = UseHugeTLBFS &&
+                 Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size);
+
+  if (UseHugeTLBFS)
+    UseSHM = false;
+
+  UseLargePages = UseHugeTLBFS || UseSHM;
+
  set_coredump_filter();

  // Large page support is available on 2.6 or newer kernel, some vendors
@@ -2922,7 +3003,7 @@ bool os::large_page_init() {
 char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) {
  // "exec" is passed in but not used.  Creating the shared image for
  // the code cache doesn't have an SHM_X executable permission to check.
-  assert(UseLargePages, "only for large pages");
+  assert(UseLargePages && UseSHM, "only for SHM large pages");

  key_t key = IPC_PRIVATE;
  char *addr;
@@ -2989,16 +3070,15 @@ size_t os::large_page_size() {
  return _large_page_size;
 }

-// Linux does not support anonymous mmap with large page memory. The only way
-// to reserve large page memory without file backing is through SysV shared
-// memory API. The entire memory region is committed and pinned upfront.
-// Hopefully this will change in the future...
+// HugeTLBFS allows application to commit large page memory on demand;
+// with SysV SHM the entire memory region must be allocated as shared
+// memory.
 bool os::can_commit_large_page_memory() {
-  return false;
+  return UseHugeTLBFS;
 }

 bool os::can_execute_large_page_memory() {
-  return false;
+  return UseHugeTLBFS;
 }

 // Reserve memory at an arbitrary address, only if that area is

--- a/src/os/linux/vm/os_linux.hpp
+++ b/src/os/linux/vm/os_linux.hpp
@@ -86,6 +86,9 @@ class Linux {

  static void rebuild_cpu_to_node_map();
  static GrowableArray<int>* cpu_to_node()    { return _cpu_to_node; }
+
+  static bool hugetlbfs_sanity_check(bool warn, size_t page_size);
+
 public:
  static void init_thread_fpu_state();
  static int  get_fpu_control_word();

--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@@ -1161,6 +1161,7 @@ bool G1CollectedHeap::do_collection(bool explicit_gc,
    TraceTime t(system_gc ? "Full GC (System.gc())" : "Full GC",
                PrintGC, true, gclog_or_tty);

+    TraceCollectorStats tcs(g1mm()->full_collection_counters());
    TraceMemoryManagerStats tms(true /* fullGC */);

    double start = os::elapsedTime();
@@ -1339,6 +1340,7 @@ bool G1CollectedHeap::do_collection(bool explicit_gc,
  if (PrintHeapAtGC) {
    Universe::print_heap_after_gc();
  }
+  g1mm()->update_counters();

  return true;
 }
@@ -1971,6 +1973,10 @@ jint G1CollectedHeap::initialize() {

  init_mutator_alloc_region();

+  // Do create of the monitoring and management support so that
+  // values in the heap have been properly initialized.
+  _g1mm = new G1MonitoringSupport(this, &_g1_storage);
+
  return JNI_OK;
 }

@@ -2113,6 +2119,28 @@ bool G1CollectedHeap::should_do_concurrent_full_gc(GCCause::Cause cause) {
     (cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent));
 }

+#ifndef PRODUCT
+void G1CollectedHeap::allocate_dummy_regions() {
+  // Let's fill up most of the region
+  size_t word_size = HeapRegion::GrainWords - 1024;
+  // And as a result the region we'll allocate will be humongous.
+  guarantee(isHumongous(word_size), "sanity");
+
+  for (uintx i = 0; i < G1DummyRegionsPerGC; ++i) {
+    // Let's use the existing mechanism for the allocation
+    HeapWord* dummy_obj = humongous_obj_allocate(word_size);
+    if (dummy_obj != NULL) {
+      MemRegion mr(dummy_obj, word_size);
+      CollectedHeap::fill_with_object(mr);
+    } else {
+      // If we can't allocate once, we probably cannot allocate
+      // again. Let's get out of the loop.
+      break;
+    }
+  }
+}
+#endif // !PRODUCT
+
 void G1CollectedHeap::increment_full_collections_completed(bool concurrent) {
  MonitorLockerEx x(FullGCCount_lock, Mutex::_no_safepoint_check_flag);

@@ -3164,6 +3192,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
    TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
    TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty);

+    TraceCollectorStats tcs(g1mm()->incremental_collection_counters());
    TraceMemoryManagerStats tms(false /* fullGC */);

    // If the secondary_free_list is not empty, append it to the
@@ -3338,6 +3367,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
        doConcurrentMark();
      }

+      allocate_dummy_regions();
+
 #if YOUNG_LIST_VERBOSE
      gclog_or_tty->print_cr("\nEnd of the pause.\nYoung_list:");
      _young_list->print();
@@ -3401,6 +3432,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
  if (PrintHeapAtGC) {
    Universe::print_heap_after_gc();
  }
+  g1mm()->update_counters();
+
  if (G1SummarizeRSetStats &&
      (G1SummarizeRSetStatsPeriod > 0) &&
      (total_collections() % G1SummarizeRSetStatsPeriod == 0)) {
@@ -5314,6 +5347,7 @@ HeapRegion* G1CollectedHeap::new_mutator_alloc_region(size_t word_size,
    if (new_alloc_region != NULL) {
      g1_policy()->update_region_num(true /* next_is_young */);
      set_region_short_lived_locked(new_alloc_region);
+      g1mm()->update_eden_counters();
      return new_alloc_region;
    }
  }

--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@@ -28,7 +28,9 @@
 #include "gc_implementation/g1/concurrentMark.hpp"
 #include "gc_implementation/g1/g1AllocRegion.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
+#include "gc_implementation/g1/g1MonitoringSupport.hpp"
 #include "gc_implementation/g1/heapRegionSets.hpp"
+#include "gc_implementation/shared/hSpaceCounters.hpp"
 #include "gc_implementation/parNew/parGCAllocBuffer.hpp"
 #include "memory/barrierSet.hpp"
 #include "memory/memRegion.hpp"
@@ -57,6 +59,7 @@ class HeapRegionRemSetIterator;
 class ConcurrentMark;
 class ConcurrentMarkThread;
 class ConcurrentG1Refine;
+class GenerationCounters;

 typedef OverflowTaskQueue<StarTask>         RefToScanQueue;
 typedef GenericTaskQueueSet<RefToScanQueue> RefToScanQueueSet;
@@ -236,6 +239,9 @@ private:
  // current collection.
  HeapRegion* _gc_alloc_region_list;

+  // Helper for monitoring and management support.
+  G1MonitoringSupport* _g1mm;
+
  // Determines PLAB size for a particular allocation purpose.
  static size_t desired_plab_sz(GCAllocPurpose purpose);

@@ -298,6 +304,14 @@ private:
  // started is maintained in _total_full_collections in CollectedHeap.
  volatile unsigned int _full_collections_completed;

+  // This is a non-product method that is helpful for testing. It is
+  // called at the end of a GC and artificially expands the heap by
+  // allocating a number of dead regions. This way we can induce very
+  // frequent marking cycles and stress the cleanup / concurrent
+  // cleanup code more (as all the regions that will be allocated by
+  // this method will be found dead by the marking cycle).
+  void allocate_dummy_regions() PRODUCT_RETURN;
+
  // These are macros so that, if the assert fires, we get the correct
  // line number, file, etc.

@@ -542,6 +556,9 @@ protected:
  HeapWord* expand_and_allocate(size_t word_size);

 public:
+
+  G1MonitoringSupport* g1mm() { return _g1mm; }
+
  // Expand the garbage-first heap by at least the given size (in bytes!).
  // Returns true if the heap was expanded by the requested amount;
  // false otherwise.

--- a/src/share/vm/gc_implementation/g1/g1MonitoringSupport.cpp
+++ b/src/share/vm/gc_implementation/g1/g1MonitoringSupport.cpp
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1MonitoringSupport.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1CollectorPolicy.hpp"
+
+G1MonitoringSupport::G1MonitoringSupport(G1CollectedHeap* g1h,
+                                         VirtualSpace* g1_storage_addr) :
+  _g1h(g1h),
+  _incremental_collection_counters(NULL),
+  _full_collection_counters(NULL),
+  _non_young_collection_counters(NULL),
+  _old_space_counters(NULL),
+  _young_collection_counters(NULL),
+  _eden_counters(NULL),
+  _from_counters(NULL),
+  _to_counters(NULL),
+  _g1_storage_addr(g1_storage_addr)
+{
+  // Counters for GC collections
+  //
+  //  name "collector.0".  In a generational collector this would be the
+  // young generation collection.
+  _incremental_collection_counters =
+    new CollectorCounters("G1 incremental collections", 0);
+  //   name "collector.1".  In a generational collector this would be the
+  // old generation collection.
+  _full_collection_counters =
+    new CollectorCounters("G1 stop-the-world full collections", 1);
+
+  // timer sampling for all counters supporting sampling only update the
+  // used value.  See the take_sample() method.  G1 requires both used and
+  // capacity updated so sampling is not currently used.  It might
+  // be sufficient to update all counters in take_sample() even though
+  // take_sample() only returns "used".  When sampling was used, there
+  // were some anomolous values emitted which may have been the consequence
+  // of not updating all values simultaneously (i.e., see the calculation done
+  // in eden_space_used(), is it possbile that the values used to
+  // calculate either eden_used or survivor_used are being updated by
+  // the collector when the sample is being done?).
+  const bool sampled = false;
+
+  // "Generation" and "Space" counters.
+  //
+  //  name "generation.1" This is logically the old generation in
+  // generational GC terms.  The "1, 1" parameters are for
+  // the n-th generation (=1) with 1 space.
+  // Counters are created from minCapacity, maxCapacity, and capacity
+  _non_young_collection_counters =
+    new GenerationCounters("whole heap", 1, 1, _g1_storage_addr);
+
+  //  name  "generation.1.space.0"
+  // Counters are created from maxCapacity, capacity, initCapacity,
+  // and used.
+  _old_space_counters = new HSpaceCounters("space", 0,
+    _g1h->max_capacity(), _g1h->capacity(), _non_young_collection_counters);
+
+  //   Young collection set
+  //  name "generation.0".  This is logically the young generation.
+  //  The "0, 3" are paremeters for the n-th genertaion (=0) with 3 spaces.
+  // See  _non_young_collection_counters for additional counters
+  _young_collection_counters = new GenerationCounters("young", 0, 3, NULL);
+
+  // Replace "max_heap_byte_size() with maximum young gen size for
+  // g1Collectedheap
+  //  name "generation.0.space.0"
+  // See _old_space_counters for additional counters
+  _eden_counters = new HSpaceCounters("eden", 0,
+    _g1h->max_capacity(), eden_space_committed(),
+    _young_collection_counters);
+
+  //  name "generation.0.space.1"
+  // See _old_space_counters for additional counters
+  // Set the arguments to indicate that this survivor space is not used.
+  _from_counters = new HSpaceCounters("s0", 1, (long) 0, (long) 0,
+    _young_collection_counters);
+
+  //  name "generation.0.space.2"
+  // See _old_space_counters for additional counters
+  _to_counters = new HSpaceCounters("s1", 2,
+    _g1h->max_capacity(),
+    survivor_space_committed(),
+    _young_collection_counters);
+}
+
+size_t G1MonitoringSupport::overall_committed() {
+  return g1h()->capacity();
+}
+
+size_t G1MonitoringSupport::overall_used() {
+  return g1h()->used_unlocked();
+}
+
+size_t G1MonitoringSupport::eden_space_committed() {
+  return MAX2(eden_space_used(), (size_t) HeapRegion::GrainBytes);
+}
+
+size_t G1MonitoringSupport::eden_space_used() {
+  size_t young_list_length = g1h()->young_list()->length();
+  size_t eden_used = young_list_length * HeapRegion::GrainBytes;
+  size_t survivor_used = survivor_space_used();
+  eden_used = subtract_up_to_zero(eden_used, survivor_used);
+  return eden_used;
+}
+
+size_t G1MonitoringSupport::survivor_space_committed() {
+  return MAX2(survivor_space_used(),
+              (size_t) HeapRegion::GrainBytes);
+}
+
+size_t G1MonitoringSupport::survivor_space_used() {
+  size_t survivor_num = g1h()->g1_policy()->recorded_survivor_regions();
+  size_t survivor_used = survivor_num * HeapRegion::GrainBytes;
+  return survivor_used;
+}
+
+size_t G1MonitoringSupport::old_space_committed() {
+  size_t committed = overall_committed();
+  size_t eden_committed = eden_space_committed();
+  size_t survivor_committed = survivor_space_committed();
+  committed = subtract_up_to_zero(committed, eden_committed);
+  committed = subtract_up_to_zero(committed, survivor_committed);
+  committed = MAX2(committed, (size_t) HeapRegion::GrainBytes);
+  return committed;
+}
+
+// See the comment near the top of g1MonitoringSupport.hpp for
+// an explanation of these calculations for "used" and "capacity".
+size_t G1MonitoringSupport::old_space_used() {
+  size_t used = overall_used();
+  size_t eden_used = eden_space_used();
+  size_t survivor_used = survivor_space_used();
+  used = subtract_up_to_zero(used, eden_used);
+  used = subtract_up_to_zero(used, survivor_used);
+  return used;
+}
+
+void G1MonitoringSupport::update_counters() {
+  if (UsePerfData) {
+    eden_counters()->update_capacity(eden_space_committed());
+    eden_counters()->update_used(eden_space_used());
+    to_counters()->update_capacity(survivor_space_committed());
+    to_counters()->update_used(survivor_space_used());
+    old_space_counters()->update_capacity(old_space_committed());
+    old_space_counters()->update_used(old_space_used());
+    non_young_collection_counters()->update_all();
+  }
+}
+
+void G1MonitoringSupport::update_eden_counters() {
+  if (UsePerfData) {
+    eden_counters()->update_capacity(eden_space_committed());
+    eden_counters()->update_used(eden_space_used());
+  }
+}
--- a/src/share/vm/gc_implementation/g1/g1MonitoringSupport.hpp
+++ b/src/share/vm/gc_implementation/g1/g1MonitoringSupport.hpp
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1MONITORINGSUPPORT_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1MONITORINGSUPPORT_HPP
+
+#include "gc_implementation/shared/hSpaceCounters.hpp"
+
+class G1CollectedHeap;
+class G1SpaceMonitoringSupport;
+
+// Class for monitoring logical spaces in G1.
+// G1 defines a set of regions as a young
+// collection (analogous to a young generation).
+// The young collection is a logical generation
+// with no fixed chunk (see space.hpp) reflecting
+// the address space for the generation.  In addition
+// to the young collection there is its complement
+// the non-young collection that is simply the regions
+// not in the young collection.  The non-young collection
+// is treated here as a logical old generation only
+// because the monitoring tools expect a generational
+// heap.  The monitoring tools expect that a Space
+// (see space.hpp) exists that describe the
+// address space of young collection and non-young
+// collection and such a view is provided here.
+//
+// This class provides interfaces to access
+// the value of variables for the young collection
+// that include the "capacity" and "used" of the
+// young collection along with constant values
+// for the minimum and maximum capacities for
+// the logical spaces.  Similarly for the non-young
+// collection.
+//
+// Also provided are counters for G1 concurrent collections
+// and stop-the-world full heap collecitons.
+//
+// Below is a description of how "used" and "capactiy"
+// (or committed) is calculated for the logical spaces.
+//
+// 1) The used space calculation for a pool is not necessarily
+// independent of the others. We can easily get from G1 the overall
+// used space in the entire heap, the number of regions in the young
+// generation (includes both eden and survivors), and the number of
+// survivor regions. So, from that we calculate:
+//
+//  survivor_used = survivor_num * region_size
+//  eden_used     = young_region_num * region_size - survivor_used
+//  old_gen_used  = overall_used - eden_used - survivor_used
+//
+// Note that survivor_used and eden_used are upper bounds. To get the
+// actual value we would have to iterate over the regions and add up
+// ->used(). But that'd be expensive. So, we'll accept some lack of
+// accuracy for those two. But, we have to be careful when calculating
+// old_gen_used, in case we subtract from overall_used more then the
+// actual number and our result goes negative.
+//
+// 2) Calculating the used space is straightforward, as described
+// above. However, how do we calculate the committed space, given that
+// we allocate space for the eden, survivor, and old gen out of the
+// same pool of regions? One way to do this is to use the used value
+// as also the committed value for the eden and survivor spaces and
+// then calculate the old gen committed space as follows:
+//
+//  old_gen_committed = overall_committed - eden_committed - survivor_committed
+//
+// Maybe a better way to do that would be to calculate used for eden
+// and survivor as a sum of ->used() over their regions and then
+// calculate committed as region_num * region_size (i.e., what we use
+// to calculate the used space now). This is something to consider
+// in the future.
+//
+// 3) Another decision that is again not straightforward is what is
+// the max size that each memory pool can grow to. One way to do this
+// would be to use the committed size for the max for the eden and
+// survivors and calculate the old gen max as follows (basically, it's
+// a similar pattern to what we use for the committed space, as
+// described above):
+//
+//  old_gen_max = overall_max - eden_max - survivor_max
+//
+// Unfortunately, the above makes the max of each pool fluctuate over
+// time and, even though this is allowed according to the spec, it
+// broke several assumptions in the M&M framework (there were cases
+// where used would reach a value greater than max). So, for max we
+// use -1, which means "undefined" according to the spec.
+//
+// 4) Now, there is a very subtle issue with all the above. The
+// framework will call get_memory_usage() on the three pools
+// asynchronously. As a result, each call might get a different value
+// for, say, survivor_num which will yield inconsistent values for
+// eden_used, survivor_used, and old_gen_used (as survivor_num is used
+// in the calculation of all three). This would normally be
+// ok. However, it's possible that this might cause the sum of
+// eden_used, survivor_used, and old_gen_used to go over the max heap
+// size and this seems to sometimes cause JConsole (and maybe other
+// clients) to get confused. There's not a really an easy / clean
+// solution to this problem, due to the asynchrounous nature of the
+// framework.
+
+class G1MonitoringSupport : public CHeapObj {
+  G1CollectedHeap* _g1h;
+  VirtualSpace* _g1_storage_addr;
+
+  // jstat performance counters
+  //  incremental collections both fully and partially young
+  CollectorCounters*   _incremental_collection_counters;
+  //  full stop-the-world collections
+  CollectorCounters*   _full_collection_counters;
+  //  young collection set counters.  The _eden_counters,
+  // _from_counters, and _to_counters are associated with
+  // this "generational" counter.
+  GenerationCounters*  _young_collection_counters;
+  //  non-young collection set counters. The _old_space_counters
+  // below are associated with this "generational" counter.
+  GenerationCounters*  _non_young_collection_counters;
+  // Counters for the capacity and used for
+  //   the whole heap
+  HSpaceCounters*      _old_space_counters;
+  //   the young collection
+  HSpaceCounters*      _eden_counters;
+  //   the survivor collection (only one, _to_counters, is actively used)
+  HSpaceCounters*      _from_counters;
+  HSpaceCounters*      _to_counters;
+
+  // It returns x - y if x > y, 0 otherwise.
+  // As described in the comment above, some of the inputs to the
+  // calculations we have to do are obtained concurrently and hence
+  // may be inconsistent with each other. So, this provides a
+  // defensive way of performing the subtraction and avoids the value
+  // going negative (which would mean a very large result, given that
+  // the parameter are size_t).
+  static size_t subtract_up_to_zero(size_t x, size_t y) {
+    if (x > y) {
+      return x - y;
+    } else {
+      return 0;
+    }
+  }
+
+ public:
+  G1MonitoringSupport(G1CollectedHeap* g1h, VirtualSpace* g1_storage_addr);
+
+  G1CollectedHeap* g1h() { return _g1h; }
+  VirtualSpace* g1_storage_addr() { return _g1_storage_addr; }
+
+  // Performance Counter accessors
+  void update_counters();
+  void update_eden_counters();
+
+  CollectorCounters* incremental_collection_counters() {
+    return _incremental_collection_counters;
+  }
+  CollectorCounters* full_collection_counters() {
+    return _full_collection_counters;
+  }
+  GenerationCounters* non_young_collection_counters() {
+    return _non_young_collection_counters;
+  }
+  HSpaceCounters*      old_space_counters() { return _old_space_counters; }
+  HSpaceCounters*      eden_counters() { return _eden_counters; }
+  HSpaceCounters*      from_counters() { return _from_counters; }
+  HSpaceCounters*      to_counters() { return _to_counters; }
+
+  // Monitoring support used by
+  //   MemoryService
+  //   jstat counters
+  size_t overall_committed();
+  size_t overall_used();
+
+  size_t eden_space_committed();
+  size_t eden_space_used();
+
+  size_t survivor_space_committed();
+  size_t survivor_space_used();
+
+  size_t old_space_committed();
+  size_t old_space_used();
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1MONITORINGSUPPORT_HPP
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp
@@ -300,6 +300,11 @@
  develop(uintx, G1StressConcRegionFreeingDelayMillis, 0,                   \
          "Artificial delay during concurrent region freeing")              \
                                                                            \
+  develop(uintx, G1DummyRegionsPerGC, 0,                                    \
+          "The number of dummy regions G1 will allocate at the end of "     \
+          "each evacuation pause in order to artificially fill up the "     \
+          "heap and stress the marking implementation.")                    \
+                                                                            \
  develop(bool, ReduceInitialCardMarksForG1, false,                         \
          "When ReduceInitialCardMarks is true, this flag setting "         \
          " controls whether G1 allows the RICM optimization")              \

--- a/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp
+++ b/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp
@@ -33,44 +33,43 @@
 #include "runtime/mutexLocker.hpp"
 #include "runtime/virtualspace.hpp"

-void CardTableModRefBS::par_non_clean_card_iterate_work(Space* sp, MemRegion mr,
-                                                        DirtyCardToOopClosure* dcto_cl,
-                                                        MemRegionClosure* cl,
-                                                        int n_threads) {
-  if (n_threads > 0) {
-    assert((n_threads == 1 && ParallelGCThreads == 0) ||
-           n_threads <= (int)ParallelGCThreads,
-           "# worker threads != # requested!");
-    // Make sure the LNC array is valid for the space.
-    jbyte**   lowest_non_clean;
-    uintptr_t lowest_non_clean_base_chunk_index;
-    size_t    lowest_non_clean_chunk_size;
-    get_LNC_array_for_space(sp, lowest_non_clean,
-                            lowest_non_clean_base_chunk_index,
-                            lowest_non_clean_chunk_size);
+void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
+                                                             DirtyCardToOopClosure* dcto_cl,
+                                                             ClearNoncleanCardWrapper* cl,
+                                                             int n_threads) {
+  assert(n_threads > 0, "Error: expected n_threads > 0");
+  assert((n_threads == 1 && ParallelGCThreads == 0) ||
+         n_threads <= (int)ParallelGCThreads,
+         "# worker threads != # requested!");
+  // Make sure the LNC array is valid for the space.
+  jbyte**   lowest_non_clean;
+  uintptr_t lowest_non_clean_base_chunk_index;
+  size_t    lowest_non_clean_chunk_size;
+  get_LNC_array_for_space(sp, lowest_non_clean,
+                          lowest_non_clean_base_chunk_index,
+                          lowest_non_clean_chunk_size);

-    int n_strides = n_threads * StridesPerThread;
-    SequentialSubTasksDone* pst = sp->par_seq_tasks();
-    pst->set_n_threads(n_threads);
-    pst->set_n_tasks(n_strides);
+  int n_strides = n_threads * StridesPerThread;
+  SequentialSubTasksDone* pst = sp->par_seq_tasks();
+  pst->set_n_threads(n_threads);
+  pst->set_n_tasks(n_strides);

-    int stride = 0;
-    while (!pst->is_task_claimed(/* reference */ stride)) {
-      process_stride(sp, mr, stride, n_strides, dcto_cl, cl,
-                     lowest_non_clean,
-                     lowest_non_clean_base_chunk_index,
-                     lowest_non_clean_chunk_size);
-    }
-    if (pst->all_tasks_completed()) {
-      // Clear lowest_non_clean array for next time.
-      intptr_t first_chunk_index = addr_to_chunk_index(mr.start());
-      uintptr_t last_chunk_index  = addr_to_chunk_index(mr.last());
-      for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) {
-        intptr_t ind = ch - lowest_non_clean_base_chunk_index;
-        assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size,
-               "Bounds error");
-        lowest_non_clean[ind] = NULL;
-      }
+  int stride = 0;
+  while (!pst->is_task_claimed(/* reference */ stride)) {
+    process_stride(sp, mr, stride, n_strides, dcto_cl, cl,
+                   lowest_non_clean,
+                   lowest_non_clean_base_chunk_index,
+                   lowest_non_clean_chunk_size);
+  }
+  if (pst->all_tasks_completed()) {
+    // Clear lowest_non_clean array for next time.
+    intptr_t first_chunk_index = addr_to_chunk_index(mr.start());
+    uintptr_t last_chunk_index  = addr_to_chunk_index(mr.last());
+    for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) {
+      intptr_t ind = ch - lowest_non_clean_base_chunk_index;
+      assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size,
+             "Bounds error");
+      lowest_non_clean[ind] = NULL;
    }
  }
 }
@@ -81,7 +80,7 @@ process_stride(Space* sp,
               MemRegion used,
               jint stride, int n_strides,
               DirtyCardToOopClosure* dcto_cl,
-               MemRegionClosure* cl,
+               ClearNoncleanCardWrapper* cl,
               jbyte** lowest_non_clean,
               uintptr_t lowest_non_clean_base_chunk_index,
               size_t    lowest_non_clean_chunk_size) {
@@ -127,7 +126,11 @@ process_stride(Space* sp,
                             lowest_non_clean_base_chunk_index,
                             lowest_non_clean_chunk_size);

-    non_clean_card_iterate_work(chunk_mr, cl);
+    // We do not call the non_clean_card_iterate_serial() version because
+    // we want to clear the cards, and the ClearNoncleanCardWrapper closure
+    // itself does the work of finding contiguous dirty ranges of cards to
+    // process (and clear).
+    cl->do_MemRegion(chunk_mr);

    // Find the next chunk of the stride.
    chunk_card_start += CardsPerStrideChunk * n_strides;

--- a/src/share/vm/gc_implementation/shared/generationCounters.cpp
+++ b/src/share/vm/gc_implementation/shared/generationCounters.cpp
 /*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -51,15 +51,18 @@ GenerationCounters::GenerationCounters(const char* name,

    cname = PerfDataManager::counter_name(_name_space, "minCapacity");
    PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
+                                     _virtual_space == NULL ? 0 :
                                     _virtual_space->committed_size(), CHECK);

    cname = PerfDataManager::counter_name(_name_space, "maxCapacity");
    PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
+                                     _virtual_space == NULL ? 0 :
                                     _virtual_space->reserved_size(), CHECK);

    cname = PerfDataManager::counter_name(_name_space, "capacity");
    _current_size = PerfDataManager::create_variable(SUN_GC, cname,
-                                      PerfData::U_Bytes,
+                                     PerfData::U_Bytes,
+                                     _virtual_space == NULL ? 0 :
                                     _virtual_space->committed_size(), CHECK);
  }
 }
--- a/src/share/vm/gc_implementation/shared/generationCounters.hpp
+++ b/src/share/vm/gc_implementation/shared/generationCounters.hpp
 /*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -61,10 +61,11 @@ class GenerationCounters: public CHeapObj {
  }

  virtual void update_all() {
-    _current_size->set_value(_virtual_space->committed_size());
+    _current_size->set_value(_virtual_space == NULL ? 0 :
+                             _virtual_space->committed_size());
  }

  const char* name_space() const        { return _name_space; }
-};

+};
 #endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_GENERATIONCOUNTERS_HPP
--- a/src/share/vm/gc_implementation/shared/hSpaceCounters.cpp
+++ b/src/share/vm/gc_implementation/shared/hSpaceCounters.cpp
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/shared/hSpaceCounters.hpp"
+#include "memory/generation.hpp"
+#include "memory/resourceArea.hpp"
+
+HSpaceCounters::HSpaceCounters(const char* name,
+                               int ordinal,
+                               size_t max_size,
+                               size_t initial_capacity,
+                               GenerationCounters* gc) {
+
+  if (UsePerfData) {
+    EXCEPTION_MARK;
+    ResourceMark rm;
+
+    const char* cns =
+      PerfDataManager::name_space(gc->name_space(), "space", ordinal);
+
+    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1);
+    strcpy(_name_space, cns);
+
+    const char* cname = PerfDataManager::counter_name(_name_space, "name");
+    PerfDataManager::create_string_constant(SUN_GC, cname, name, CHECK);
+
+    cname = PerfDataManager::counter_name(_name_space, "maxCapacity");
+    PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
+                                     (jlong)max_size, CHECK);
+
+    cname = PerfDataManager::counter_name(_name_space, "capacity");
+    _capacity = PerfDataManager::create_variable(SUN_GC, cname,
+                                                 PerfData::U_Bytes,
+                                                 initial_capacity, CHECK);
+
+    cname = PerfDataManager::counter_name(_name_space, "used");
+    _used = PerfDataManager::create_variable(SUN_GC, cname, PerfData::U_Bytes,
+                                             (jlong) 0, CHECK);
+
+    cname = PerfDataManager::counter_name(_name_space, "initCapacity");
+    PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
+                                     initial_capacity, CHECK);
+  }
+}
--- a/src/share/vm/gc_implementation/shared/hSpaceCounters.hpp
+++ b/src/share/vm/gc_implementation/shared/hSpaceCounters.hpp
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_HSPACECOUNTERS_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_SHARED_HSPACECOUNTERS_HPP
+
+#ifndef SERIALGC
+#include "gc_implementation/shared/generationCounters.hpp"
+#include "memory/generation.hpp"
+#include "runtime/perfData.hpp"
+#endif
+
+// A HSpaceCounter is a holder class for performance counters
+// that track a collections (logical spaces) in a heap;
+
+class HeapSpaceUsedHelper;
+class G1SpaceMonitoringSupport;
+
+class HSpaceCounters: public CHeapObj {
+  friend class VMStructs;
+
+ private:
+  PerfVariable*        _capacity;
+  PerfVariable*        _used;
+
+  // Constant PerfData types don't need to retain a reference.
+  // However, it's a good idea to document them here.
+
+  char*             _name_space;
+
+ public:
+
+  HSpaceCounters(const char* name, int ordinal, size_t max_size,
+                 size_t initial_capacity, GenerationCounters* gc);
+
+  ~HSpaceCounters() {
+    if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space);
+  }
+
+  inline void update_capacity(size_t v) {
+    _capacity->set_value(v);
+  }
+
+  inline void update_used(size_t v) {
+    _used->set_value(v);
+  }
+
+  debug_only(
+    // for security reasons, we do not allow arbitrary reads from
+    // the counters as they may live in shared memory.
+    jlong used() {
+      return _used->get_value();
+    }
+    jlong capacity() {
+      return _used->get_value();
+    }
+  )
+
+  inline void update_all(size_t capacity, size_t used) {
+    update_capacity(capacity);
+    update_used(used);
+  }
+
+  const char* name_space() const        { return _name_space; }
+};
+#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_HSPACECOUNTERS_HPP
--- a/src/share/vm/memory/cardTableModRefBS.cpp
+++ b/src/share/vm/memory/cardTableModRefBS.cpp
@@ -456,31 +456,35 @@ bool CardTableModRefBS::mark_card_deferred(size_t card_index) {
 }


-void CardTableModRefBS::non_clean_card_iterate(Space* sp,
-                                               MemRegion mr,
-                                               DirtyCardToOopClosure* dcto_cl,
-                                               MemRegionClosure* cl) {
+void CardTableModRefBS::non_clean_card_iterate_possibly_parallel(Space* sp,
+                                                                 MemRegion mr,
+                                                                 DirtyCardToOopClosure* dcto_cl,
+                                                                 ClearNoncleanCardWrapper* cl) {
  if (!mr.is_empty()) {
    int n_threads = SharedHeap::heap()->n_par_threads();
    if (n_threads > 0) {
 #ifndef SERIALGC
-      par_non_clean_card_iterate_work(sp, mr, dcto_cl, cl, n_threads);
+      non_clean_card_iterate_parallel_work(sp, mr, dcto_cl, cl, n_threads);
 #else  // SERIALGC
      fatal("Parallel gc not supported here.");
 #endif // SERIALGC
    } else {
-      non_clean_card_iterate_work(mr, cl);
+      // We do not call the non_clean_card_iterate_serial() version below because
+      // we want to clear the cards (which non_clean_card_iterate_serial() does not
+      // do for us), and the ClearNoncleanCardWrapper closure itself does the work
+      // of finding contiguous dirty ranges of cards to process (and clear).
+      cl->do_MemRegion(mr);
    }
  }
 }

-// NOTE: For this to work correctly, it is important that
-// we look for non-clean cards below (so as to catch those
-// marked precleaned), rather than look explicitly for dirty
-// cards (and miss those marked precleaned). In that sense,
-// the name precleaned is currently somewhat of a misnomer.
-void CardTableModRefBS::non_clean_card_iterate_work(MemRegion mr,
-                                                    MemRegionClosure* cl) {
+// The iterator itself is not MT-aware, but
+// MT-aware callers and closures can use this to
+// accomplish dirty card iteration in parallel. The
+// iterator itself does not clear the dirty cards, or
+// change their values in any manner.
+void CardTableModRefBS::non_clean_card_iterate_serial(MemRegion mr,
+                                                      MemRegionClosure* cl) {
  for (int i = 0; i < _cur_covered_regions; i++) {
    MemRegion mri = mr.intersection(_covered[i]);
    if (mri.word_size() > 0) {
@@ -661,7 +665,7 @@ public:

 void CardTableModRefBS::verify_clean_region(MemRegion mr) {
  GuaranteeNotModClosure blk(this);
-  non_clean_card_iterate_work(mr, &blk);
+  non_clean_card_iterate_serial(mr, &blk);
 }

 // To verify a MemRegion is entirely dirty this closure is passed to

--- a/src/share/vm/memory/cardTableModRefBS.hpp
+++ b/src/share/vm/memory/cardTableModRefBS.hpp
@@ -44,6 +44,7 @@
 class Generation;
 class OopsInGenClosure;
 class DirtyCardToOopClosure;
+class ClearNoncleanCardWrapper;

 class CardTableModRefBS: public ModRefBarrierSet {
  // Some classes get to look at some private stuff.
@@ -165,22 +166,28 @@ class CardTableModRefBS: public ModRefBarrierSet {

  // Iterate over the portion of the card-table which covers the given
  // region mr in the given space and apply cl to any dirty sub-regions
-  // of mr. cl and dcto_cl must either be the same closure or cl must
-  // wrap dcto_cl. Both are required - neither may be NULL. Also, dcto_cl
-  // may be modified. Note that this function will operate in a parallel
-  // mode if worker threads are available.
-  void non_clean_card_iterate(Space* sp, MemRegion mr,
-                              DirtyCardToOopClosure* dcto_cl,
-                              MemRegionClosure* cl);
-
-  // Utility function used to implement the other versions below.
-  void non_clean_card_iterate_work(MemRegion mr, MemRegionClosure* cl);
-
-  void par_non_clean_card_iterate_work(Space* sp, MemRegion mr,
-                                       DirtyCardToOopClosure* dcto_cl,
-                                       MemRegionClosure* cl,
-                                       int n_threads);
+  // of mr. Dirty cards are _not_ cleared by the iterator method itself,
+  // but closures may arrange to do so on their own should they so wish.
+  void non_clean_card_iterate_serial(MemRegion mr, MemRegionClosure* cl);
+
+  // A variant of the above that will operate in a parallel mode if
+  // worker threads are available, and clear the dirty cards as it
+  // processes them.
+  // ClearNoncleanCardWrapper cl must wrap the DirtyCardToOopClosure dcto_cl,
+  // which may itself be modified by the method.
+  void non_clean_card_iterate_possibly_parallel(Space* sp, MemRegion mr,
+                                                DirtyCardToOopClosure* dcto_cl,
+                                                ClearNoncleanCardWrapper* cl);
+
+ private:
+  // Work method used to implement non_clean_card_iterate_possibly_parallel()
+  // above in the parallel case.
+  void non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
+                                            DirtyCardToOopClosure* dcto_cl,
+                                            ClearNoncleanCardWrapper* cl,
+                                            int n_threads);

+ protected:
  // Dirty the bytes corresponding to "mr" (not all of which must be
  // covered.)
  void dirty_MemRegion(MemRegion mr);
@@ -237,7 +244,7 @@ class CardTableModRefBS: public ModRefBarrierSet {
                      MemRegion used,
                      jint stride, int n_strides,
                      DirtyCardToOopClosure* dcto_cl,
-                      MemRegionClosure* cl,
+                      ClearNoncleanCardWrapper* cl,
                      jbyte** lowest_non_clean,
                      uintptr_t lowest_non_clean_base_chunk_index,
                      size_t lowest_non_clean_chunk_size);
@@ -409,14 +416,14 @@ public:
  // marking, where a dirty card may cause scanning, and summarization
  // marking, of objects that extend onto subsequent cards.)
  void mod_card_iterate(MemRegionClosure* cl) {
-    non_clean_card_iterate_work(_whole_heap, cl);
+    non_clean_card_iterate_serial(_whole_heap, cl);
  }

  // Like the "mod_cards_iterate" above, except only invokes the closure
  // for cards within the MemRegion "mr" (which is required to be
  // card-aligned and sized.)
  void mod_card_iterate(MemRegion mr, MemRegionClosure* cl) {
-    non_clean_card_iterate_work(mr, cl);
+    non_clean_card_iterate_serial(mr, cl);
  }

  static uintx ct_max_alignment_constraint();
@@ -493,4 +500,5 @@ public:
  void set_CTRS(CardTableRS* rs) { _rs = rs; }
 };

+
 #endif // SHARE_VM_MEMORY_CARDTABLEMODREFBS_HPP
--- a/src/share/vm/memory/cardTableRS.cpp
+++ b/src/share/vm/memory/cardTableRS.cpp
@@ -105,107 +105,111 @@ void CardTableRS::younger_refs_iterate(Generation* g,
  g->younger_refs_iterate(blk);
 }

-class ClearNoncleanCardWrapper: public MemRegionClosure {
-  MemRegionClosure* _dirty_card_closure;
-  CardTableRS* _ct;
-  bool _is_par;
-private:
-  // Clears the given card, return true if the corresponding card should be
-  // processed.
-  bool clear_card(jbyte* entry) {
-    if (_is_par) {
-      while (true) {
-        // In the parallel case, we may have to do this several times.
-        jbyte entry_val = *entry;
-        assert(entry_val != CardTableRS::clean_card_val(),
-               "We shouldn't be looking at clean cards, and this should "
-               "be the only place they get cleaned.");
-        if (CardTableRS::card_is_dirty_wrt_gen_iter(entry_val)
-            || _ct->is_prev_youngergen_card_val(entry_val)) {
-          jbyte res =
-            Atomic::cmpxchg(CardTableRS::clean_card_val(), entry, entry_val);
-          if (res == entry_val) {
-            break;
-          } else {
-            assert(res == CardTableRS::cur_youngergen_and_prev_nonclean_card,
-                   "The CAS above should only fail if another thread did "
-                   "a GC write barrier.");
-          }
-        } else if (entry_val ==
-                   CardTableRS::cur_youngergen_and_prev_nonclean_card) {
-          // Parallelism shouldn't matter in this case.  Only the thread
-          // assigned to scan the card should change this value.
-          *entry = _ct->cur_youngergen_card_val();
-          break;
-        } else {
-          assert(entry_val == _ct->cur_youngergen_card_val(),
-                 "Should be the only possibility.");
-          // In this case, the card was clean before, and become
-          // cur_youngergen only because of processing of a promoted object.
-          // We don't have to look at the card.
-          return false;
-        }
+inline bool ClearNoncleanCardWrapper::clear_card(jbyte* entry) {
+  if (_is_par) {
+    return clear_card_parallel(entry);
+  } else {
+    return clear_card_serial(entry);
+  }
+}
+
+inline bool ClearNoncleanCardWrapper::clear_card_parallel(jbyte* entry) {
+  while (true) {
+    // In the parallel case, we may have to do this several times.
+    jbyte entry_val = *entry;
+    assert(entry_val != CardTableRS::clean_card_val(),
+           "We shouldn't be looking at clean cards, and this should "
+           "be the only place they get cleaned.");
+    if (CardTableRS::card_is_dirty_wrt_gen_iter(entry_val)
+        || _ct->is_prev_youngergen_card_val(entry_val)) {
+      jbyte res =
+        Atomic::cmpxchg(CardTableRS::clean_card_val(), entry, entry_val);
+      if (res == entry_val) {
+        break;
+      } else {
+        assert(res == CardTableRS::cur_youngergen_and_prev_nonclean_card,
+               "The CAS above should only fail if another thread did "
+               "a GC write barrier.");
      }
-      return true;
+    } else if (entry_val ==
+               CardTableRS::cur_youngergen_and_prev_nonclean_card) {
+      // Parallelism shouldn't matter in this case.  Only the thread
+      // assigned to scan the card should change this value.
+      *entry = _ct->cur_youngergen_card_val();
+      break;
    } else {
-      jbyte entry_val = *entry;
-      assert(entry_val != CardTableRS::clean_card_val(),
-             "We shouldn't be looking at clean cards, and this should "
-             "be the only place they get cleaned.");
-      assert(entry_val != CardTableRS::cur_youngergen_and_prev_nonclean_card,
-             "This should be possible in the sequential case.");
-      *entry = CardTableRS::clean_card_val();
-      return true;
+      assert(entry_val == _ct->cur_youngergen_card_val(),
+             "Should be the only possibility.");
+      // In this case, the card was clean before, and become
+      // cur_youngergen only because of processing of a promoted object.
+      // We don't have to look at the card.
+      return false;
    }
  }
+  return true;
+}

-public:
-  ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure,
-                           CardTableRS* ct) :
+
+inline bool ClearNoncleanCardWrapper::clear_card_serial(jbyte* entry) {
+  jbyte entry_val = *entry;
+  assert(entry_val != CardTableRS::clean_card_val(),
+         "We shouldn't be looking at clean cards, and this should "
+         "be the only place they get cleaned.");
+  assert(entry_val != CardTableRS::cur_youngergen_and_prev_nonclean_card,
+         "This should be possible in the sequential case.");
+  *entry = CardTableRS::clean_card_val();
+  return true;
+}
+
+ClearNoncleanCardWrapper::ClearNoncleanCardWrapper(
+  MemRegionClosure* dirty_card_closure, CardTableRS* ct) :
    _dirty_card_closure(dirty_card_closure), _ct(ct) {
    _is_par = (SharedHeap::heap()->n_par_threads() > 0);
-  }
-  void do_MemRegion(MemRegion mr) {
-    // We start at the high end of "mr", walking backwards
-    // while accumulating a contiguous dirty range of cards in
-    // [start_of_non_clean, end_of_non_clean) which we then
-    // process en masse.
-    HeapWord* end_of_non_clean = mr.end();
-    HeapWord* start_of_non_clean = end_of_non_clean;
-    jbyte*       entry = _ct->byte_for(mr.last());
-    const jbyte* first_entry = _ct->byte_for(mr.start());
-    while (entry >= first_entry) {
-      HeapWord* cur = _ct->addr_for(entry);
-      if (!clear_card(entry)) {
-        // We hit a clean card; process any non-empty
-        // dirty range accumulated so far.
-        if (start_of_non_clean < end_of_non_clean) {
-          MemRegion mr2(start_of_non_clean, end_of_non_clean);
-          _dirty_card_closure->do_MemRegion(mr2);
-        }
-        // Reset the dirty window while continuing to
-        // look for the next dirty window to process.
-        end_of_non_clean = cur;
-        start_of_non_clean = end_of_non_clean;
+}
+
+void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) {
+  assert(mr.word_size() > 0, "Error");
+  assert(_ct->is_aligned(mr.start()), "mr.start() should be card aligned");
+  // mr.end() may not necessarily be card aligned.
+  jbyte* cur_entry = _ct->byte_for(mr.last());
+  const jbyte* limit = _ct->byte_for(mr.start());
+  HeapWord* end_of_non_clean = mr.end();
+  HeapWord* start_of_non_clean = end_of_non_clean;
+  while (cur_entry >= limit) {
+    HeapWord* cur_hw = _ct->addr_for(cur_entry);
+    if ((*cur_entry != CardTableRS::clean_card_val()) && clear_card(cur_entry)) {
+      // Continue the dirty range by opening the
+      // dirty window one card to the left.
+      start_of_non_clean = cur_hw;
+    } else {
+      // We hit a "clean" card; process any non-empty
+      // "dirty" range accumulated so far.
+      if (start_of_non_clean < end_of_non_clean) {
+        const MemRegion mrd(start_of_non_clean, end_of_non_clean);
+        _dirty_card_closure->do_MemRegion(mrd);
      }
-      // Open the left end of the window one card to the left.
-      start_of_non_clean = cur;
-      // Note that "entry" leads "start_of_non_clean" in
-      // its leftward excursion after this point
-      // in the loop and, when we hit the left end of "mr",
-      // will point off of the left end of the card-table
-      // for "mr".
-      entry--;
-    }
-    // If the first card of "mr" was dirty, we will have
-    // been left with a dirty window, co-initial with "mr",
-    // which we now process.
-    if (start_of_non_clean < end_of_non_clean) {
-      MemRegion mr2(start_of_non_clean, end_of_non_clean);
-      _dirty_card_closure->do_MemRegion(mr2);
+      // Reset the dirty window, while continuing to look
+      // for the next dirty card that will start a
+      // new dirty window.
+      end_of_non_clean = cur_hw;
+      start_of_non_clean = cur_hw;
    }
+    // Note that "cur_entry" leads "start_of_non_clean" in
+    // its leftward excursion after this point
+    // in the loop and, when we hit the left end of "mr",
+    // will point off of the left end of the card-table
+    // for "mr".
+    cur_entry--;
  }
-};
+  // If the first card of "mr" was dirty, we will have
+  // been left with a dirty window, co-initial with "mr",
+  // which we now process.
+  if (start_of_non_clean < end_of_non_clean) {
+    const MemRegion mrd(start_of_non_clean, end_of_non_clean);
+    _dirty_card_closure->do_MemRegion(mrd);
+  }
+}
+
 // clean (by dirty->clean before) ==> cur_younger_gen
 // dirty                          ==> cur_youngergen_and_prev_nonclean_card
 // precleaned                     ==> cur_youngergen_and_prev_nonclean_card
@@ -246,8 +250,8 @@ void CardTableRS::younger_refs_in_space_iterate(Space* sp,
                                                   cl->gen_boundary());
  ClearNoncleanCardWrapper clear_cl(dcto_cl, this);

-  _ct_bs->non_clean_card_iterate(sp, sp->used_region_at_save_marks(),
-                                dcto_cl, &clear_cl);
+  _ct_bs->non_clean_card_iterate_possibly_parallel(sp, sp->used_region_at_save_marks(),
+                                                   dcto_cl, &clear_cl);
 }

 void CardTableRS::clear_into_younger(Generation* gen, bool clear_perm) {

--- a/src/share/vm/memory/cardTableRS.hpp
+++ b/src/share/vm/memory/cardTableRS.hpp
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -166,4 +166,21 @@ public:

 };

+class ClearNoncleanCardWrapper: public MemRegionClosure {
+  MemRegionClosure* _dirty_card_closure;
+  CardTableRS* _ct;
+  bool _is_par;
+private:
+  // Clears the given card, return true if the corresponding card should be
+  // processed.
+  inline bool clear_card(jbyte* entry);
+  // Work methods called by the clear_card()
+  inline bool clear_card_serial(jbyte* entry);
+  inline bool clear_card_parallel(jbyte* entry);
+
+public:
+  ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure, CardTableRS* ct);
+  void do_MemRegion(MemRegion mr);
+};
+
 #endif // SHARE_VM_MEMORY_CARDTABLERS_HPP
--- a/src/share/vm/services/g1MemoryPool.cpp
+++ b/src/share/vm/services/g1MemoryPool.cpp
 /*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -34,10 +34,10 @@ G1MemoryPoolSuper::G1MemoryPoolSuper(G1CollectedHeap* g1h,
                                     size_t init_size,
                                     bool support_usage_threshold) :
  _g1h(g1h), CollectedMemoryPool(name,
-                                 MemoryPool::Heap,
-                                 init_size,
-                                 undefined_max(),
-                                 support_usage_threshold) {
+                                   MemoryPool::Heap,
+                                   init_size,
+                                   undefined_max(),
+                                   support_usage_threshold) {
  assert(UseG1GC, "sanity");
 }

@@ -48,44 +48,27 @@ size_t G1MemoryPoolSuper::eden_space_committed(G1CollectedHeap* g1h) {

 // See the comment at the top of g1MemoryPool.hpp
 size_t G1MemoryPoolSuper::eden_space_used(G1CollectedHeap* g1h) {
-  size_t young_list_length = g1h->young_list()->length();
-  size_t eden_used = young_list_length * HeapRegion::GrainBytes;
-  size_t survivor_used = survivor_space_used(g1h);
-  eden_used = subtract_up_to_zero(eden_used, survivor_used);
-  return eden_used;
+  return g1h->g1mm()->eden_space_used();
 }

 // See the comment at the top of g1MemoryPool.hpp
 size_t G1MemoryPoolSuper::survivor_space_committed(G1CollectedHeap* g1h) {
-  return MAX2(survivor_space_used(g1h), (size_t) HeapRegion::GrainBytes);
+  return g1h->g1mm()->survivor_space_committed();
 }

 // See the comment at the top of g1MemoryPool.hpp
 size_t G1MemoryPoolSuper::survivor_space_used(G1CollectedHeap* g1h) {
-  size_t survivor_num = g1h->g1_policy()->recorded_survivor_regions();
-  size_t survivor_used = survivor_num * HeapRegion::GrainBytes;
-  return survivor_used;
+  return g1h->g1mm()->survivor_space_used();
 }

 // See the comment at the top of g1MemoryPool.hpp
 size_t G1MemoryPoolSuper::old_space_committed(G1CollectedHeap* g1h) {
-  size_t committed = overall_committed(g1h);
-  size_t eden_committed = eden_space_committed(g1h);
-  size_t survivor_committed = survivor_space_committed(g1h);
-  committed = subtract_up_to_zero(committed, eden_committed);
-  committed = subtract_up_to_zero(committed, survivor_committed);
-  committed = MAX2(committed, (size_t) HeapRegion::GrainBytes);
-  return committed;
+  return g1h->g1mm()->old_space_committed();
 }

 // See the comment at the top of g1MemoryPool.hpp
 size_t G1MemoryPoolSuper::old_space_used(G1CollectedHeap* g1h) {
-  size_t used = overall_used(g1h);
-  size_t eden_used = eden_space_used(g1h);
-  size_t survivor_used = survivor_space_used(g1h);
-  used = subtract_up_to_zero(used, eden_used);
-  used = subtract_up_to_zero(used, survivor_used);
-  return used;
+  return g1h->g1mm()->old_space_used();
 }

 G1EdenPool::G1EdenPool(G1CollectedHeap* g1h) :

--- a/src/share/vm/services/g1MemoryPool.hpp
+++ b/src/share/vm/services/g1MemoryPool.hpp
 /*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -46,68 +46,9 @@ class G1CollectedHeap;
 // get, as this does affect the performance and behavior of G1. Which
 // is why we introduce the three memory pools implemented here.
 //
-// The above approach inroduces a couple of challenging issues in the
-// implementation of the three memory pools:
+// See comments in g1MonitoringSupport.hpp for additional details
+// on this model.
 //
-// 1) The used space calculation for a pool is not necessarily
-// independent of the others. We can easily get from G1 the overall
-// used space in the entire heap, the number of regions in the young
-// generation (includes both eden and survivors), and the number of
-// survivor regions. So, from that we calculate:
-//
-//  survivor_used = survivor_num * region_size
-//  eden_used     = young_region_num * region_size - survivor_used
-//  old_gen_used  = overall_used - eden_used - survivor_used
-//
-// Note that survivor_used and eden_used are upper bounds. To get the
-// actual value we would have to iterate over the regions and add up
-// ->used(). But that'd be expensive. So, we'll accept some lack of
-// accuracy for those two. But, we have to be careful when calculating
-// old_gen_used, in case we subtract from overall_used more then the
-// actual number and our result goes negative.
-//
-// 2) Calculating the used space is straightforward, as described
-// above. However, how do we calculate the committed space, given that
-// we allocate space for the eden, survivor, and old gen out of the
-// same pool of regions? One way to do this is to use the used value
-// as also the committed value for the eden and survivor spaces and
-// then calculate the old gen committed space as follows:
-//
-//  old_gen_committed = overall_committed - eden_committed - survivor_committed
-//
-// Maybe a better way to do that would be to calculate used for eden
-// and survivor as a sum of ->used() over their regions and then
-// calculate committed as region_num * region_size (i.e., what we use
-// to calculate the used space now). This is something to consider
-// in the future.
-//
-// 3) Another decision that is again not straightforward is what is
-// the max size that each memory pool can grow to. One way to do this
-// would be to use the committed size for the max for the eden and
-// survivors and calculate the old gen max as follows (basically, it's
-// a similar pattern to what we use for the committed space, as
-// described above):
-//
-//  old_gen_max = overall_max - eden_max - survivor_max
-//
-// Unfortunately, the above makes the max of each pool fluctuate over
-// time and, even though this is allowed according to the spec, it
-// broke several assumptions in the M&M framework (there were cases
-// where used would reach a value greater than max). So, for max we
-// use -1, which means "undefined" according to the spec.
-//
-// 4) Now, there is a very subtle issue with all the above. The
-// framework will call get_memory_usage() on the three pools
-// asynchronously. As a result, each call might get a different value
-// for, say, survivor_num which will yield inconsistent values for
-// eden_used, survivor_used, and old_gen_used (as survivor_num is used
-// in the calculation of all three). This would normally be
-// ok. However, it's possible that this might cause the sum of
-// eden_used, survivor_used, and old_gen_used to go over the max heap
-// size and this seems to sometimes cause JConsole (and maybe other
-// clients) to get confused. There's not a really an easy / clean
-// solution to this problem, due to the asynchrounous nature of the
-// framework.


 // This class is shared by the three G1 memory pool classes
@@ -116,22 +57,6 @@ class G1CollectedHeap;
 // (see comment above), we put the calculations in this class so that
 // we can easily share them among the subclasses.
 class G1MemoryPoolSuper : public CollectedMemoryPool {
-private:
-  // It returns x - y if x > y, 0 otherwise.
-  // As described in the comment above, some of the inputs to the
-  // calculations we have to do are obtained concurrently and hence
-  // may be inconsistent with each other. So, this provides a
-  // defensive way of performing the subtraction and avoids the value
-  // going negative (which would mean a very large result, given that
-  // the parameter are size_t).
-  static size_t subtract_up_to_zero(size_t x, size_t y) {
-    if (x > y) {
-      return x - y;
-    } else {
-      return 0;
-    }
-  }
-
 protected:
  G1CollectedHeap* _g1h;

@@ -148,13 +73,6 @@ protected:
    return (size_t) -1;
  }

-  static size_t overall_committed(G1CollectedHeap* g1h) {
-    return g1h->capacity();
-  }
-  static size_t overall_used(G1CollectedHeap* g1h) {
-    return g1h->used_unlocked();
-  }
-
  static size_t eden_space_committed(G1CollectedHeap* g1h);
  static size_t eden_space_used(G1CollectedHeap* g1h);