From 1b6f63aeff532deea8c9e198b5f25b24f989b641 Mon Sep 17 00:00:00 2001
From: stefank <unknown>
Date: Fri, 31 May 2013 14:32:44 +0200
Subject: [PATCH] 8022880: False sharing between PSPromotionManager instances
 Summary: Pad the PSPromotionManager instances in the manager array.
 Reviewed-by: brutisso, jmasa

---
 .../concurrentMarkSweepGeneration.cpp         |  1 +
 .../parNew/parNewGeneration.hpp               |  3 +-
 .../parNew/parOopClosures.hpp                 |  3 +-
 .../parallelScavenge/psPromotionManager.cpp   | 25 +++--
 .../parallelScavenge/psPromotionManager.hpp   | 12 ++-
 .../psPromotionManager.inline.hpp             |  2 +-
 src/share/vm/memory/padded.hpp                | 93 +++++++++++++++++++
 src/share/vm/memory/padded.inline.hpp         | 49 ++++++++++
 src/share/vm/utilities/debug.hpp              | 16 ++++
 src/share/vm/utilities/globalDefinitions.hpp  | 32 ++-----
 10 files changed, 189 insertions(+), 47 deletions(-)
 create mode 100644 src/share/vm/memory/padded.hpp
 create mode 100644 src/share/vm/memory/padded.inline.hpp
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
index c04a52613..54ade2db1 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@@ -50,6 +50,7 @@
 #include "memory/genMarkSweep.hpp"
 #include "memory/genOopClosures.inline.hpp"
 #include "memory/iterator.hpp"
+#include "memory/padded.hpp"
 #include "memory/referencePolicy.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/tenuredGeneration.hpp"
diff --git a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
index 987767b16..6d3b25d95 100644
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
 #include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #include "gc_implementation/shared/copyFailedInfo.hpp"
 #include "memory/defNewGeneration.hpp"
+#include "memory/padded.hpp"
 #include "utilities/taskqueue.hpp"
 
 class ChunkArray;
diff --git a/src/share/vm/gc_implementation/parNew/parOopClosures.hpp b/src/share/vm/gc_implementation/parNew/parOopClosures.hpp
index 0d9d7761c..00b865f2a 100644
--- a/src/share/vm/gc_implementation/parNew/parOopClosures.hpp
+++ b/src/share/vm/gc_implementation/parNew/parOopClosures.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PAROOPCLOSURES_HPP
 
 #include "memory/genOopClosures.hpp"
+#include "memory/padded.hpp"
 
 // Closures for ParNewGeneration
 
diff --git a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp
index 32929e7a5..dd3933b00 100644
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp
@@ -29,14 +29,16 @@
 #include "gc_implementation/parallelScavenge/psScavenge.inline.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/mutableSpace.hpp"
+#include "memory/allocation.inline.hpp"
 #include "memory/memRegion.hpp"
+#include "memory/padded.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "oops/oop.psgc.inline.hpp"
 
-PSPromotionManager**         PSPromotionManager::_manager_array = NULL;
-OopStarTaskQueueSet*         PSPromotionManager::_stack_array_depth = NULL;
-PSOldGen*                    PSPromotionManager::_old_gen = NULL;
-MutableSpace*                PSPromotionManager::_young_space = NULL;
+PaddedEnd<PSPromotionManager>* PSPromotionManager::_manager_array = NULL;
+OopStarTaskQueueSet*           PSPromotionManager::_stack_array_depth = NULL;
+PSOldGen*                      PSPromotionManager::_old_gen = NULL;
+MutableSpace*                  PSPromotionManager::_young_space = NULL;
 
 void PSPromotionManager::initialize() {
   ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
@@ -45,8 +47,10 @@ void PSPromotionManager::initialize() {
   _old_gen = heap->old_gen();
   _young_space = heap->young_gen()->to_space();
 
+  // To prevent false sharing, we pad the PSPromotionManagers
+  // and make sure that the first instance starts at a cache line.
   assert(_manager_array == NULL, "Attempt to initialize twice");
-  _manager_array = NEW_C_HEAP_ARRAY(PSPromotionManager*, ParallelGCThreads+1, mtGC);
+  _manager_array = PaddedArray<PSPromotionManager, mtGC>::create_unfreeable(ParallelGCThreads + 1);
   guarantee(_manager_array != NULL, "Could not initialize promotion manager");
 
   _stack_array_depth = new OopStarTaskQueueSet(ParallelGCThreads);
@@ -54,26 +58,21 @@ void PSPromotionManager::initialize() {
 
   // Create and register the PSPromotionManager(s) for the worker threads.
   for(uint i=0; i<ParallelGCThreads; i++) {
-    _manager_array[i] = new PSPromotionManager();
-    guarantee(_manager_array[i] != NULL, "Could not create PSPromotionManager");
-    stack_array_depth()->register_queue(i, _manager_array[i]->claimed_stack_depth());
+    stack_array_depth()->register_queue(i, _manager_array[i].claimed_stack_depth());
   }
-
   // The VMThread gets its own PSPromotionManager, which is not available
   // for work stealing.
-  _manager_array[ParallelGCThreads] = new PSPromotionManager();
-  guarantee(_manager_array[ParallelGCThreads] != NULL, "Could not create PSPromotionManager");
 }
 
 PSPromotionManager* PSPromotionManager::gc_thread_promotion_manager(int index) {
   assert(index >= 0 && index < (int)ParallelGCThreads, "index out of range");
   assert(_manager_array != NULL, "Sanity");
-  return _manager_array[index];
+  return &_manager_array[index];
 }
 
 PSPromotionManager* PSPromotionManager::vm_thread_promotion_manager() {
   assert(_manager_array != NULL, "Sanity");
-  return _manager_array[ParallelGCThreads];
+  return &_manager_array[ParallelGCThreads];
 }
 
 void PSPromotionManager::pre_scavenge() {
diff --git a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp
index 8f4731428..6707ade2d 100644
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp
@@ -29,6 +29,8 @@
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/copyFailedInfo.hpp"
 #include "memory/allocation.hpp"
+#include "memory/padded.hpp"
+#include "utilities/globalDefinitions.hpp"
 #include "utilities/taskqueue.hpp"
 
 //
@@ -51,14 +53,14 @@ class MutableSpace;
 class PSOldGen;
 class ParCompactionManager;
 
-class PSPromotionManager : public CHeapObj<mtGC> {
+class PSPromotionManager VALUE_OBJ_CLASS_SPEC {
   friend class PSScavenge;
   friend class PSRefProcTaskExecutor;
  private:
-  static PSPromotionManager**         _manager_array;
-  static OopStarTaskQueueSet*         _stack_array_depth;
-  static PSOldGen*                    _old_gen;
-  static MutableSpace*                _young_space;
+  static PaddedEnd<PSPromotionManager>* _manager_array;
+  static OopStarTaskQueueSet*           _stack_array_depth;
+  static PSOldGen*                      _old_gen;
+  static MutableSpace*                  _young_space;
 
 #if TASKQUEUE_STATS
   size_t                              _masked_pushes;
diff --git a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
index 841ef64f2..34c935408 100644
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
@@ -32,7 +32,7 @@
 inline PSPromotionManager* PSPromotionManager::manager_array(int index) {
   assert(_manager_array != NULL, "access of NULL manager_array");
   assert(index >= 0 && index <= (int)ParallelGCThreads, "out of range manager_array access");
-  return _manager_array[index];
+  return &_manager_array[index];
 }
 
 template <class T>
diff --git a/src/share/vm/memory/padded.hpp b/src/share/vm/memory/padded.hpp
new file mode 100644
index 000000000..4c50b3996
--- /dev/null
+++ b/src/share/vm/memory/padded.hpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_MEMORY_PADDED_HPP
+#define SHARE_VM_MEMORY_PADDED_HPP
+
+#include "memory/allocation.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+// Bytes needed to pad type to avoid cache-line sharing; alignment should be the
+// expected cache line size (a power of two).  The first addend avoids sharing
+// when the start address is not a multiple of alignment; the second maintains
+// alignment of starting addresses that happen to be a multiple.
+#define PADDING_SIZE(type, alignment)                           \
+  ((alignment) + align_size_up_(sizeof(type), alignment))
+
+// Templates to create a subclass padded to avoid cache line sharing.  These are
+// effective only when applied to derived-most (leaf) classes.
+
+// When no args are passed to the base ctor.
+template <class T, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
+class Padded : public T {
+ private:
+  char _pad_buf_[PADDING_SIZE(T, alignment)];
+};
+
+// When either 0 or 1 args may be passed to the base ctor.
+template <class T, typename Arg1T, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
+class Padded01 : public T {
+ public:
+  Padded01(): T() { }
+  Padded01(Arg1T arg1): T(arg1) { }
+ private:
+  char _pad_buf_[PADDING_SIZE(T, alignment)];
+};
+
+// Super class of PaddedEnd when pad_size != 0.
+template <class T, size_t pad_size>
+class PaddedEndImpl : public T {
+ private:
+  char _pad_buf[pad_size];
+};
+
+// Super class of PaddedEnd when pad_size == 0.
+template <class T>
+class PaddedEndImpl<T, /*pad_size*/ 0> : public T {
+  // No padding.
+};
+
+#define PADDED_END_SIZE(type, alignment) (align_size_up_(sizeof(type), alignment) - sizeof(type))
+
+// More memory conservative implementation of Padded. The subclass adds the
+// minimal amount of padding needed to make the size of the objects be aligned.
+// This will help reducing false sharing,
+// if the start address is a multiple of alignment.
+template <class T, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
+class PaddedEnd : public PaddedEndImpl<T, PADDED_END_SIZE(T, alignment)> {
+  // C++ don't allow zero-length arrays. The padding is put in a
+  // super class that is specialized for the pad_size == 0 case.
+};
+
+// Helper class to create an array of PaddedEnd<T> objects. All elements will
+// start at a multiple of alignment and the size will be aligned to alignment.
+template <class T, MEMFLAGS flags, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
+class PaddedArray {
+ public:
+  // Creates an aligned padded array.
+  // The memory can't be deleted since the raw memory chunk is not returned.
+  static PaddedEnd<T>* create_unfreeable(uint length);
+};
+
+#endif // SHARE_VM_MEMORY_PADDED_HPP
diff --git a/src/share/vm/memory/padded.inline.hpp b/src/share/vm/memory/padded.inline.hpp
new file mode 100644
index 000000000..1e9994ab6
--- /dev/null
+++ b/src/share/vm/memory/padded.inline.hpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "memory/allocation.inline.hpp"
+#include "memory/padded.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+// Creates an aligned padded array.
+// The memory can't be deleted since the raw memory chunk is not returned.
+template <class T, MEMFLAGS flags, size_t alignment>
+PaddedEnd<T>* PaddedArray<T, flags, alignment>::create_unfreeable(uint length) {
+  // Check that the PaddedEnd class works as intended.
+  STATIC_ASSERT(is_size_aligned_(sizeof(PaddedEnd<T>), alignment));
+
+  // Allocate a chunk of memory large enough to allow for some alignment.
+  void* chunk = AllocateHeap(length * sizeof(PaddedEnd<T, alignment>) + alignment, flags);
+
+  // Make the initial alignment.
+  PaddedEnd<T>* aligned_padded_array = (PaddedEnd<T>*)align_pointer_up(chunk, alignment);
+
+  // Call the default constructor for each element.
+  for (uint i = 0; i < length; i++) {
+    ::new (&aligned_padded_array[i]) T();
+  }
+
+  return aligned_padded_array;
+}
diff --git a/src/share/vm/utilities/debug.hpp b/src/share/vm/utilities/debug.hpp
index 2450c8fe1..85b26f35f 100644
--- a/src/share/vm/utilities/debug.hpp
+++ b/src/share/vm/utilities/debug.hpp
@@ -225,6 +225,22 @@ void report_untested(const char* file, int line, const char* message);
 
 void warning(const char* format, ...);
 
+#ifdef ASSERT
+// Compile-time asserts.
+template <bool> struct StaticAssert;
+template <> struct StaticAssert<true> {};
+
+// Only StaticAssert<true> is defined, so if cond evaluates to false we get
+// a compile time exception when trying to use StaticAssert<false>.
+#define STATIC_ASSERT(cond)                   \
+  do {                                        \
+    StaticAssert<(cond)> DUMMY_STATIC_ASSERT; \
+    (void)DUMMY_STATIC_ASSERT; /* ignore */   \
+  } while (false)
+#else
+#define STATIC_ASSERT(cond)
+#endif
+
 // out of shared space reporting
 enum SharedSpaceType {
   SharedPermGen,
diff --git a/src/share/vm/utilities/globalDefinitions.hpp b/src/share/vm/utilities/globalDefinitions.hpp
index 181e80a08..f15bb5da9 100644
--- a/src/share/vm/utilities/globalDefinitions.hpp
+++ b/src/share/vm/utilities/globalDefinitions.hpp
@@ -410,6 +410,8 @@ inline intptr_t align_size_down(intptr_t size, intptr_t alignment) {
   return align_size_down_(size, alignment);
 }
 
+#define is_size_aligned_(size, alignment) ((size) == (align_size_up_(size, alignment)))
+
 // Align objects by rounding up their size, in HeapWord units.
 
 #define align_object_size_(size) align_size_up_(size, MinObjAlignment)
@@ -428,6 +430,10 @@ inline intptr_t align_object_offset(intptr_t offset) {
   return align_size_up(offset, HeapWordsPerLong);
 }
 
+inline void* align_pointer_up(const void* addr, size_t size) {
+  return (void*) align_size_up_((uintptr_t)addr, size);
+}
+
 // Clamp an address to be within a specific page
 // 1. If addr is on the page it is returned as is
 // 2. If addr is above the page_address the start of the *next* page will be returned
@@ -449,32 +455,6 @@ inline address clamp_address_in_page(address addr, address page_address, intptr_
 // The expected size in bytes of a cache line, used to pad data structures.
 #define DEFAULT_CACHE_LINE_SIZE 64
 
-// Bytes needed to pad type to avoid cache-line sharing; alignment should be the
-// expected cache line size (a power of two).  The first addend avoids sharing
-// when the start address is not a multiple of alignment; the second maintains
-// alignment of starting addresses that happen to be a multiple.
-#define PADDING_SIZE(type, alignment)                           \
-  ((alignment) + align_size_up_(sizeof(type), alignment))
-
-// Templates to create a subclass padded to avoid cache line sharing.  These are
-// effective only when applied to derived-most (leaf) classes.
-
-// When no args are passed to the base ctor.
-template <class T, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
-class Padded: public T {
-private:
-  char _pad_buf_[PADDING_SIZE(T, alignment)];
-};
-
-// When either 0 or 1 args may be passed to the base ctor.
-template <class T, typename Arg1T, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
-class Padded01: public T {
-public:
-  Padded01(): T() { }
-  Padded01(Arg1T arg1): T(arg1) { }
-private:
-  char _pad_buf_[PADDING_SIZE(T, alignment)];
-};
 
 //----------------------------------------------------------------------------------------------------
 // Utility macros for compilers
-- 
GitLab