Merge

fd7608b0 · tonyp · d732eca9 · b3b6893b · fd7608b0 · fd7608b0
10 changed file
--- a/src/os/linux/vm/globals_linux.hpp
+++ b/src/os/linux/vm/globals_linux.hpp
@@ -38,5 +38,6 @@
 // platforms, but they may have different default values on other platforms.
 //
 define_pd_global(bool, UseLargePages, false);
+define_pd_global(bool, UseLargePagesIndividualAllocation, false);
 define_pd_global(bool, UseOSErrorReporting, false);
 define_pd_global(bool, UseThreadPriorities, true) ;
--- a/src/os/solaris/vm/globals_solaris.hpp
+++ b/src/os/solaris/vm/globals_solaris.hpp
@@ -44,5 +44,6 @@
 // platforms, but they may have different default values on other platforms.
 //
 define_pd_global(bool, UseLargePages, true);
+define_pd_global(bool, UseLargePagesIndividualAllocation, false);
 define_pd_global(bool, UseOSErrorReporting, false);
 define_pd_global(bool, UseThreadPriorities, false);
--- a/src/os/windows/vm/globals_windows.hpp
+++ b/src/os/windows/vm/globals_windows.hpp
@@ -37,5 +37,6 @@
 // platforms, but they may have different default values on other platforms.
 //
 define_pd_global(bool, UseLargePages, false);
+define_pd_global(bool, UseLargePagesIndividualAllocation, true);
 define_pd_global(bool, UseOSErrorReporting, false);  // for now.
 define_pd_global(bool, UseThreadPriorities, true) ;
--- a/src/os/windows/vm/os_windows.cpp
+++ b/src/os/windows/vm/os_windows.cpp
@@ -2593,9 +2593,104 @@ bool os::can_execute_large_page_memory() {
 }
 char* os::reserve_memory_special(size_t bytes) {
+  if (UseLargePagesIndividualAllocation) {
+    if (TracePageSizes && Verbose) {
+       tty->print_cr("Reserving large pages individually.");
+    }
+    char * p_buf;
+    // first reserve enough address space in advance since we want to be
+    // able to break a single contiguous virtual address range into multiple
+    // large page commits but WS2003 does not allow reserving large page space
+    // so we just use 4K pages for reserve, this gives us a legal contiguous
+    // address space. then we will deallocate that reservation, and re alloc
+    // using large pages
+    const size_t size_of_reserve = bytes + _large_page_size;
+    if (bytes > size_of_reserve) {
+      // Overflowed.
+      warning("Individually allocated large pages failed, "
+        "use -XX:-UseLargePagesIndividualAllocation to turn off");
+      return NULL;
+    }
+    p_buf = (char *) VirtualAlloc(NULL,
+                                 size_of_reserve,  // size of Reserve
+                                 MEM_RESERVE,
+                                 PAGE_EXECUTE_READWRITE);
+    // If reservation failed, return NULL
+    if (p_buf == NULL) return NULL;
+    release_memory(p_buf, bytes + _large_page_size);
+    // round up to page boundary.  If the size_of_reserve did not
+    // overflow and the reservation did not fail, this align up
+    // should not overflow.
+    p_buf = (char *) align_size_up((size_t)p_buf, _large_page_size);
+    // now go through and allocate one page at a time until all bytes are
+    // allocated
+    size_t  bytes_remaining = align_size_up(bytes, _large_page_size);
+    // An overflow of align_size_up() would have been caught above
+    // in the calculation of size_of_reserve.
+    char * next_alloc_addr = p_buf;
+#ifdef ASSERT
+    // Variable for the failure injection
+    long ran_num = os::random();
+    size_t fail_after = ran_num % bytes;
+#endif
+    while (bytes_remaining) {
+      size_t bytes_to_rq = MIN2(bytes_remaining, _large_page_size);
+      // Note allocate and commit
+      char * p_new;
+#ifdef ASSERT
+      bool inject_error = LargePagesIndividualAllocationInjectError &&
+          (bytes_remaining <= fail_after);
+#else
+      const bool inject_error = false;
+#endif
+      if (inject_error) {
+        p_new = NULL;
+      } else {
+        p_new = (char *) VirtualAlloc(next_alloc_addr,
+                                    bytes_to_rq,
+                                    MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
+                                    PAGE_EXECUTE_READWRITE);
+      }
+      if (p_new == NULL) {
+        // Free any allocated pages
+        if (next_alloc_addr > p_buf) {
+          // Some memory was committed so release it.
+          size_t bytes_to_release = bytes - bytes_remaining;
+          release_memory(p_buf, bytes_to_release);
+        }
+#ifdef ASSERT
+        if (UseLargePagesIndividualAllocation &&
+            LargePagesIndividualAllocationInjectError) {
+          if (TracePageSizes && Verbose) {
+             tty->print_cr("Reserving large pages individually failed.");
+          }
+        }
+#endif
+        return NULL;
+      }
+      bytes_remaining -= bytes_to_rq;
+      next_alloc_addr += bytes_to_rq;
+    }
+    return p_buf;
+  } else {
+    // normal policy just allocate it all at once
    DWORD flag = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
-  char * res = (char *)VirtualAlloc(NULL, bytes, flag, PAGE_EXECUTE_READWRITE);
+    char * res = (char *)VirtualAlloc(NULL,
+                                      bytes,
+                                      flag,
+                                      PAGE_EXECUTE_READWRITE);
    return res;
+  }
 }
 bool os::release_memory_special(char* base, size_t bytes) {
@@ -2983,6 +3078,7 @@ size_t os::win32::_default_stack_size = 0;
 volatile intx os::win32::_os_thread_count    = 0;
 bool   os::win32::_is_nt              = false;
+bool   os::win32::_is_windows_2003    = false;
 void os::win32::initialize_system_info() {
@@ -3005,7 +3101,15 @@ void os::win32::initialize_system_info() {
  GetVersionEx(&oi);
  switch(oi.dwPlatformId) {
    case VER_PLATFORM_WIN32_WINDOWS: _is_nt = false; break;
-    case VER_PLATFORM_WIN32_NT:      _is_nt = true;  break;
+    case VER_PLATFORM_WIN32_NT:
+      _is_nt = true;
+      {
+        int os_vers = oi.dwMajorVersion * 1000 + oi.dwMinorVersion;
+        if (os_vers == 5002) {
+          _is_windows_2003 = true;
+        }
+      }
+      break;
    default: fatal("Unknown platform");
  }
@@ -3103,6 +3207,10 @@ void os::init(void) {
    NoYieldsInMicrolock = true;
  }
 #endif
+  // This may be overridden later when argument processing is done.
+  FLAG_SET_ERGO(bool, UseLargePagesIndividualAllocation,
+    os::win32::is_windows_2003());
  // Initialize main_process and main_thread
  main_process = GetCurrentProcess();  // Remember main_process is a pseudo handle
 if (!DuplicateHandle(main_process, GetCurrentThread(), main_process,

--- a/src/os/windows/vm/os_windows.hpp
+++ b/src/os/windows/vm/os_windows.hpp
@@ -34,6 +34,7 @@ class win32 {
  static julong _physical_memory;
  static size_t _default_stack_size;
  static bool   _is_nt;
+  static bool   _is_windows_2003;
 public:
  // Windows-specific interface:
@@ -60,6 +61,9 @@ class win32 {
  // Tells whether the platform is NT or Windown95
  static bool is_nt() { return _is_nt; }
+  // Tells whether the platform is Windows 2003
+  static bool is_windows_2003() { return _is_windows_2003; }
  // Returns the byte size of a virtual memory page
  static int vm_page_size() { return _vm_page_size; }

--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@@ -1789,6 +1789,20 @@ G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl,
  }
 }
+class ResetClaimValuesClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    r->set_claim_value(HeapRegion::InitialClaimValue);
+    return false;
+  }
+};
+void
+G1CollectedHeap::reset_heap_region_claim_values() {
+  ResetClaimValuesClosure blk;
+  heap_region_iterate(&blk);
+}
 #ifdef ASSERT
 // This checks whether all regions in the heap have the correct claim
 // value. I also piggy-backed on this a check to ensure that the
@@ -2031,10 +2045,12 @@ public:
 class VerifyRegionClosure: public HeapRegionClosure {
 public:
  bool _allow_dirty;
-  VerifyRegionClosure(bool allow_dirty)
+  bool _par;
-    : _allow_dirty(allow_dirty) {}
+  VerifyRegionClosure(bool allow_dirty, bool par = false)
+    : _allow_dirty(allow_dirty), _par(par) {}
  bool doHeapRegion(HeapRegion* r) {
-    guarantee(r->claim_value() == 0, "Should be unclaimed at verify points.");
+    guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue,
+              "Should be unclaimed at verify points.");
    if (r->isHumongous()) {
      if (r->startsHumongous()) {
        // Verify the single H object.
@@ -2082,6 +2098,25 @@ public:
  }
 };
+// This is the task used for parallel heap verification.
+class G1ParVerifyTask: public AbstractGangTask {
+private:
+  G1CollectedHeap* _g1h;
+  bool _allow_dirty;
+public:
+  G1ParVerifyTask(G1CollectedHeap* g1h, bool allow_dirty) :
+    AbstractGangTask("Parallel verify task"),
+    _g1h(g1h), _allow_dirty(allow_dirty) { }
+  void work(int worker_i) {
+    VerifyRegionClosure blk(_allow_dirty, true);
+    _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
+                                          HeapRegion::ParVerifyClaimValue);
+  }
+};
 void G1CollectedHeap::verify(bool allow_dirty, bool silent) {
  if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
    if (!silent) { gclog_or_tty->print("roots "); }
@@ -2092,8 +2127,27 @@ void G1CollectedHeap::verify(bool allow_dirty, bool silent) {
                         &rootsCl);
    rem_set()->invalidate(perm_gen()->used_region(), false);
    if (!silent) { gclog_or_tty->print("heapRegions "); }
+    if (GCParallelVerificationEnabled && ParallelGCThreads > 1) {
+      assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue),
+             "sanity check");
+      G1ParVerifyTask task(this, allow_dirty);
+      int n_workers = workers()->total_workers();
+      set_par_threads(n_workers);
+      workers()->run_task(&task);
+      set_par_threads(0);
+      assert(check_heap_region_claim_values(HeapRegion::ParVerifyClaimValue),
+             "sanity check");
+      reset_heap_region_claim_values();
+      assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue),
+             "sanity check");
+    } else {
      VerifyRegionClosure blk(allow_dirty);
      _hrs->iterate(&blk);
+    }
    if (!silent) gclog_or_tty->print("remset ");
    rem_set()->verify();
    guarantee(!rootsCl.failures(), "should not have had failures");

--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@@ -890,6 +890,9 @@ public:
                                       int worker,
                                       jint claim_value);
+  // It resets all the region claim values to the default.
+  void reset_heap_region_claim_values();
 #ifdef ASSERT
  bool check_heap_region_claim_values(jint claim_value);
 #endif // ASSERT

--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp
@@ -317,7 +317,8 @@ class HeapRegion: public G1OffsetTableContigSpace {
    InitialClaimValue     = 0,
    FinalCountClaimValue  = 1,
    NoteEndClaimValue     = 2,
-    ScrubRemSetClaimValue = 3
+    ScrubRemSetClaimValue = 3,
+    ParVerifyClaimValue   = 4
  };
  // Concurrent refinement requires contiguous heap regions (in which TLABs

--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
@@ -391,6 +391,8 @@ size_t MutableNUMASpace::default_chunk_size() {
 }
 // Produce a new chunk size. page_size() aligned.
+// This function is expected to be called on sequence of i's from 0 to
+// lgrp_spaces()->length().
 size_t MutableNUMASpace::adaptive_chunk_size(int i, size_t limit) {
  size_t pages_available = base_space_size();
  for (int j = 0; j < i; j++) {
@@ -405,7 +407,7 @@ size_t MutableNUMASpace::adaptive_chunk_size(int i, size_t limit) {
  size_t chunk_size = 0;
  if (alloc_rate > 0) {
    LGRPSpace *ls = lgrp_spaces()->at(i);
-    chunk_size = (size_t)(ls->alloc_rate()->average() * pages_available / alloc_rate) * page_size();
+    chunk_size = (size_t)(ls->alloc_rate()->average() / alloc_rate * pages_available) * page_size();
  }
  chunk_size = MAX2(chunk_size, page_size());

--- a/src/share/vm/runtime/globals.hpp
+++ b/src/share/vm/runtime/globals.hpp
@@ -330,6 +330,12 @@ class CommandLineFlags {
  product_pd(bool, UseLargePages,                                           \
          "Use large page memory")                                          \
                                                                            \
+  product_pd(bool, UseLargePagesIndividualAllocation,                       \
+          "Allocate large pages individually for better affinity")          \
+                                                                            \
+  develop(bool, LargePagesIndividualAllocationInjectError, false,           \
+          "Fail large pages individual allocation")                         \
+                                                                            \
  develop(bool, TracePageSizes, false,                                      \
          "Trace page size selection and usage.")                           \
                                                                            \
@@ -1819,6 +1825,9 @@ class CommandLineFlags {
  diagnostic(bool, VerifyDuringGC, false,                                   \
          "Verify memory system during GC (between phases)")                \
                                                                            \
+  diagnostic(bool, GCParallelVerificationEnabled, true,                     \
+          "Enable parallel memory system verification")                     \
+                                                                            \
  diagnostic(bool, VerifyRememberedSets, false,                             \
          "Verify GC remembered sets")                                      \
                                                                            \