6362677: Change parallel GC collector default number of parallel GC threads.

Summary: Use the same default number of GC threads as used by ParNewGC and ConcMarkSweepGC (i.e., the 5/8th rule). Reviewed-by: ysr, tonyp

6362677: Change parallel GC collector default number of parallel GC threads.
Summary: Use the same default number of GC threads as used by ParNewGC and ConcMarkSweepGC (i.e., the 5/8th rule). Reviewed-by: ysr, tonyp
1b52db35 · jmasa · 39cad98a · 1b52db35 · 1b52db35 · 1b52db35
8 changed file
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp
@@ -28,6 +28,12 @@
 int VM_Version::_features = VM_Version::unknown_m;
 const char* VM_Version::_features_str = "";

+bool VM_Version::is_niagara1_plus() {
+  // This is a placeholder until the real test is determined.
+  return is_niagara1() &&
+    (os::processor_count() > maximum_niagara1_processor_count());
+}
+
 void VM_Version::initialize() {
  _features = determine_features();
  PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
@@ -160,3 +166,13 @@ void VM_Version::allow_all() {
 void VM_Version::revert() {
  _features = saved_features;
 }
+
+unsigned int VM_Version::calc_parallel_worker_threads() {
+  unsigned int result;
+  if (is_niagara1_plus()) {
+    result = nof_parallel_worker_threads(5, 16, 8);
+  } else {
+    result = nof_parallel_worker_threads(5, 8, 8);
+  }
+  return result;
+}
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp
@@ -64,6 +64,11 @@ protected:

  static bool is_niagara1(int features) { return (features & niagara1_m) == niagara1_m; }

+  static int maximum_niagara1_processor_count() { return 32; }
+  // Returns true if the platform is in the niagara line and
+  // newer than the niagara1.
+  static bool is_niagara1_plus();
+
 public:
  // Initialization
  static void initialize();
@@ -129,4 +134,7 @@ public:

  // Override the Abstract_VM_Version implementation.
  static uint page_size_count() { return is_sun4v() ? 4 : 2; }
+
+  // Calculates the number of parallel threads
+  static unsigned int calc_parallel_worker_threads();
 };
--- a/src/share/vm/gc_implementation/parallelScavenge/generationSizer.hpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/generationSizer.hpp
@@ -39,10 +39,10 @@ class GenerationSizer : public TwoGenerationCollectorPolicy {

    // If the user hasn't explicitly set the number of worker
    // threads, set the count.
-    if (ParallelGCThreads == 0) {
-      assert(UseParallelGC, "Setting ParallelGCThreads without UseParallelGC");
-      ParallelGCThreads = os::active_processor_count();
-    }
+    assert(UseSerialGC ||
+           !FLAG_IS_DEFAULT(ParallelGCThreads) ||
+           (ParallelGCThreads > 0),
+           "ParallelGCThreads should be set before flag initialization");

    // The survivor ratio's are calculated "raw", unlike the
    // default gc, which adds 2 to the ratio value. We need to

--- a/src/share/vm/runtime/arguments.cpp
+++ b/src/share/vm/runtime/arguments.cpp
@@ -924,10 +924,18 @@ static void no_shared_spaces() {
 void Arguments::set_parnew_gc_flags() {
  assert(!UseSerialGC && !UseParallelGC, "control point invariant");

+  // Turn off AdaptiveSizePolicy by default for parnew until it is
+  // complete.
+  if (UseParNewGC &&
+      FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) {
+    FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false);
+  }
+
  if (FLAG_IS_DEFAULT(UseParNewGC) && ParallelGCThreads > 1) {
    FLAG_SET_DEFAULT(UseParNewGC, true);
  } else if (UseParNewGC && ParallelGCThreads == 0) {
-    FLAG_SET_DEFAULT(ParallelGCThreads, nof_parallel_gc_threads());
+    FLAG_SET_DEFAULT(ParallelGCThreads,
+                     Abstract_VM_Version::parallel_worker_threads());
    if (FLAG_IS_DEFAULT(ParallelGCThreads) && ParallelGCThreads == 1) {
      FLAG_SET_DEFAULT(UseParNewGC, false);
    }
@@ -956,25 +964,6 @@ void Arguments::set_parnew_gc_flags() {
  }
 }

-// CAUTION: this code is currently shared by UseParallelGC, UseParNewGC and
-// UseconcMarkSweepGC. Further tuning of individual collectors might
-// dictate refinement on a per-collector basis.
-int Arguments::nof_parallel_gc_threads() {
-  if (FLAG_IS_DEFAULT(ParallelGCThreads)) {
-    // For very large machines, there are diminishing returns
-    // for large numbers of worker threads.  Instead of
-    // hogging the whole system, use 5/8ths of a worker for every
-    // processor after the first 8.  For example, on a 72 cpu
-    // machine use 8 + (72 - 8) * (5/8) == 48 worker threads.
-    // This is just a start and needs further tuning and study in
-    // Tiger.
-    int ncpus = os::active_processor_count();
-    return (ncpus <= 8) ? ncpus : 3 + ((ncpus * 5) / 8);
-  } else {
-    return ParallelGCThreads;
-  }
-}
-
 // Adjust some sizes to suit CMS and/or ParNew needs; these work well on
 // sparc/solaris for certain applications, but would gain from
 // further optimization and tuning efforts, and would almost
@@ -984,26 +973,24 @@ void Arguments::set_cms_and_parnew_gc_flags() {
    return;
  }

+  assert(UseConcMarkSweepGC, "CMS is expected to be on here");
+
  // If we are using CMS, we prefer to UseParNewGC,
  // unless explicitly forbidden.
-  if (UseConcMarkSweepGC && !UseParNewGC && FLAG_IS_DEFAULT(UseParNewGC)) {
-    FLAG_SET_DEFAULT(UseParNewGC, true);
+  if (!UseParNewGC && FLAG_IS_DEFAULT(UseParNewGC)) {
+    FLAG_SET_ERGO(bool, UseParNewGC, true);
  }

  // Turn off AdaptiveSizePolicy by default for cms until it is
-  // complete.  Also turn it off in general if the
-  // parnew collector has been selected.
-  if ((UseConcMarkSweepGC || UseParNewGC) &&
-      FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) {
+  // complete.
+  if (FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) {
    FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false);
  }

  // In either case, adjust ParallelGCThreads and/or UseParNewGC
  // as needed.
-  set_parnew_gc_flags();
-
-  if (!UseConcMarkSweepGC) {
-    return;
+  if (UseParNewGC) {
+    set_parnew_gc_flags();
  }

  // Now make adjustments for CMS
@@ -1147,17 +1134,11 @@ void Arguments::set_ergonomics_flags() {
        FLAG_IS_DEFAULT(UseParallelGC)) {
      if (should_auto_select_low_pause_collector()) {
        FLAG_SET_ERGO(bool, UseConcMarkSweepGC, true);
-        set_cms_and_parnew_gc_flags();
      } else {
        FLAG_SET_ERGO(bool, UseParallelGC, true);
      }
      no_shared_spaces();
    }
-
-    // This is here because the parallel collector could
-    // have been selected so this initialization should
-    // still be done.
-    set_parallel_gc_flags();
  }
 }

@@ -1170,6 +1151,9 @@ void Arguments::set_parallel_gc_flags() {
  // If no heap maximum was requested explicitly, use some reasonable fraction
  // of the physical memory, up to a maximum of 1GB.
  if (UseParallelGC) {
+    FLAG_SET_ERGO(uintx, ParallelGCThreads,
+                  Abstract_VM_Version::parallel_worker_threads());
+
    if (FLAG_IS_DEFAULT(MaxHeapSize)) {
      const uint64_t reasonable_fraction =
        os::physical_memory() / DefaultMaxRAMFraction;
@@ -1312,6 +1296,31 @@ static bool verify_serial_gc_flags() {
          UseParallelOldGC));
 }

+// Check consistency of GC selection
+bool Arguments::check_gc_consistency() {
+  bool status = true;
+  // Ensure that the user has not selected conflicting sets
+  // of collectors. [Note: this check is merely a user convenience;
+  // collectors over-ride each other so that only a non-conflicting
+  // set is selected; however what the user gets is not what they
+  // may have expected from the combination they asked for. It's
+  // better to reduce user confusion by not allowing them to
+  // select conflicting combinations.
+  uint i = 0;
+  if (UseSerialGC)                       i++;
+  if (UseConcMarkSweepGC || UseParNewGC) i++;
+  if (UseParallelGC || UseParallelOldGC) i++;
+  if (i > 1) {
+    jio_fprintf(defaultStream::error_stream(),
+                "Conflicting collector combinations in option list; "
+                "please refer to the release notes for the combinations "
+                "allowed\n");
+    status = false;
+  }
+
+  return status;
+}
+
 // Check the consistency of vm_init_args
 bool Arguments::check_vm_args_consistency() {
  // Method for adding checks for flag consistency.
@@ -1354,14 +1363,14 @@ bool Arguments::check_vm_args_consistency() {
    status = false;
  }

-  status &= verify_percentage(MaxLiveObjectEvacuationRatio,
+  status = status && verify_percentage(MaxLiveObjectEvacuationRatio,
                              "MaxLiveObjectEvacuationRatio");
-  status &= verify_percentage(AdaptiveSizePolicyWeight,
+  status = status && verify_percentage(AdaptiveSizePolicyWeight,
                              "AdaptiveSizePolicyWeight");
-  status &= verify_percentage(AdaptivePermSizeWeight, "AdaptivePermSizeWeight");
-  status &= verify_percentage(ThresholdTolerance, "ThresholdTolerance");
-  status &= verify_percentage(MinHeapFreeRatio, "MinHeapFreeRatio");
-  status &= verify_percentage(MaxHeapFreeRatio, "MaxHeapFreeRatio");
+  status = status && verify_percentage(AdaptivePermSizeWeight, "AdaptivePermSizeWeight");
+  status = status && verify_percentage(ThresholdTolerance, "ThresholdTolerance");
+  status = status && verify_percentage(MinHeapFreeRatio, "MinHeapFreeRatio");
+  status = status && verify_percentage(MaxHeapFreeRatio, "MaxHeapFreeRatio");

  if (MinHeapFreeRatio > MaxHeapFreeRatio) {
    jio_fprintf(defaultStream::error_stream(),
@@ -1377,14 +1386,14 @@ bool Arguments::check_vm_args_consistency() {
    MarkSweepAlwaysCompactCount = 1;  // Move objects every gc.
  }

-  status &= verify_percentage(GCHeapFreeLimit, "GCHeapFreeLimit");
-  status &= verify_percentage(GCTimeLimit, "GCTimeLimit");
+  status = status && verify_percentage(GCHeapFreeLimit, "GCHeapFreeLimit");
+  status = status && verify_percentage(GCTimeLimit, "GCTimeLimit");
  if (GCTimeLimit == 100) {
    // Turn off gc-overhead-limit-exceeded checks
    FLAG_SET_DEFAULT(UseGCOverheadLimit, false);
  }

-  status &= verify_percentage(GCHeapFreeLimit, "GCHeapFreeLimit");
+  status = status && verify_percentage(GCHeapFreeLimit, "GCHeapFreeLimit");

  // Check user specified sharing option conflict with Parallel GC
  bool cannot_share = (UseConcMarkSweepGC || UseParallelGC ||
@@ -1402,24 +1411,7 @@ bool Arguments::check_vm_args_consistency() {
    }
  }

-  // Ensure that the user has not selected conflicting sets
-  // of collectors. [Note: this check is merely a user convenience;
-  // collectors over-ride each other so that only a non-conflicting
-  // set is selected; however what the user gets is not what they
-  // may have expected from the combination they asked for. It's
-  // better to reduce user confusion by not allowing them to
-  // select conflicting combinations.
-  uint i = 0;
-  if (UseSerialGC)                       i++;
-  if (UseConcMarkSweepGC || UseParNewGC) i++;
-  if (UseParallelGC || UseParallelOldGC) i++;
-  if (i > 1) {
-    jio_fprintf(defaultStream::error_stream(),
-                "Conflicting collector combinations in option list; "
-                "please refer to the release notes for the combinations "
-                "allowed\n");
-    status = false;
-  }
+  status = status && check_gc_consistency();

  if (_has_alloc_profile) {
    if (UseParallelGC || UseParallelOldGC) {
@@ -1451,15 +1443,15 @@ bool Arguments::check_vm_args_consistency() {
                  "allocation buffers\n(-XX:+UseTLAB).\n");
      status = false;
    } else {
-      status &= verify_percentage(CMSIncrementalDutyCycle,
+      status = status && verify_percentage(CMSIncrementalDutyCycle,
                                  "CMSIncrementalDutyCycle");
-      status &= verify_percentage(CMSIncrementalDutyCycleMin,
+      status = status && verify_percentage(CMSIncrementalDutyCycleMin,
                                  "CMSIncrementalDutyCycleMin");
-      status &= verify_percentage(CMSIncrementalSafetyFactor,
+      status = status && verify_percentage(CMSIncrementalSafetyFactor,
                                  "CMSIncrementalSafetyFactor");
-      status &= verify_percentage(CMSIncrementalOffset,
+      status = status && verify_percentage(CMSIncrementalOffset,
                                  "CMSIncrementalOffset");
-      status &= verify_percentage(CMSExpAvgFactor,
+      status = status && verify_percentage(CMSExpAvgFactor,
                                  "CMSExpAvgFactor");
      // If it was not set on the command line, set
      // CMSInitiatingOccupancyFraction to 1 so icms can initiate cycles early.
@@ -2064,7 +2056,8 @@ jint Arguments::parse_each_vm_init_arg(const JavaVMInitArgs* args,

      // Enable parallel GC and adaptive generation sizing
      FLAG_SET_CMDLINE(bool, UseParallelGC, true);
-      FLAG_SET_DEFAULT(ParallelGCThreads, nof_parallel_gc_threads());
+      FLAG_SET_DEFAULT(ParallelGCThreads,
+                       Abstract_VM_Version::parallel_worker_threads());

      // Encourage steady state memory management
      FLAG_SET_CMDLINE(uintx, ThresholdTolerance, 100);
@@ -2451,15 +2444,25 @@ jint Arguments::parse(const JavaVMInitArgs* args) {
  no_shared_spaces();
 #endif // KERNEL

-  // Set some flags for ParallelGC if needed.
-  set_parallel_gc_flags();
-
-  // Set some flags for CMS and/or ParNew collectors, as needed.
-  set_cms_and_parnew_gc_flags();
-
  // Set flags based on ergonomics.
  set_ergonomics_flags();

+  // Check the GC selections again.
+  if (!check_gc_consistency()) {
+    return JNI_EINVAL;
+  }
+
+  if (UseParallelGC) {
+    // Set some flags for ParallelGC if needed.
+    set_parallel_gc_flags();
+  } else if (UseConcMarkSweepGC) {
+    // Set some flags for CMS
+    set_cms_and_parnew_gc_flags();
+  } else if (UseParNewGC) {
+    // Set some flags for ParNew
+    set_parnew_gc_flags();
+  }
+
 #ifdef SERIALGC
  assert(verify_serial_gc_flags(), "SerialGC unset");
 #endif // SERIALGC
@@ -2479,6 +2482,12 @@ jint Arguments::parse(const JavaVMInitArgs* args) {
    CommandLineFlags::printSetFlags();
  }

+#ifdef ASSERT
+  if (PrintFlagsFinal) {
+    CommandLineFlags::printFlags();
+  }
+#endif
+
  return JNI_OK;
 }


--- a/src/share/vm/runtime/arguments.hpp
+++ b/src/share/vm/runtime/arguments.hpp
@@ -291,8 +291,6 @@ class Arguments : AllStatic {
  static bool _CIDynamicCompilePriority;
  static intx _Tier2CompileThreshold;

-  // GC processing
-  static int nof_parallel_gc_threads();
  // CMS/ParNew garbage collectors
  static void set_parnew_gc_flags();
  static void set_cms_and_parnew_gc_flags();
@@ -385,6 +383,8 @@ class Arguments : AllStatic {
 public:
  // Parses the arguments
  static jint parse(const JavaVMInitArgs* args);
+  // Check for consistency in the selection of the garbage collector.
+  static bool check_gc_consistency();
  // Check consistecy or otherwise of VM argument settings
  static bool check_vm_args_consistency();
  // Used by os_solaris

--- a/src/share/vm/runtime/globals.hpp
+++ b/src/share/vm/runtime/globals.hpp
@@ -1794,6 +1794,9 @@ class CommandLineFlags {
          "number of times a GC thread (minus the coordinator) "            \
          "will sleep while yielding before giving up and resuming GC")     \
                                                                            \
+  notproduct(bool, PrintFlagsFinal, false,                                  \
+          "Print all command line flags after argument processing")         \
+                                                                            \
  /* gc tracing */                                                          \
  manageable(bool, PrintGC, false,                                          \
          "Print message at garbage collect")                               \

--- a/src/share/vm/runtime/vm_version.cpp
+++ b/src/share/vm/runtime/vm_version.cpp
@@ -52,6 +52,8 @@ int Abstract_VM_Version::_vm_major_version = 0;
 int Abstract_VM_Version::_vm_minor_version = 0;
 int Abstract_VM_Version::_vm_build_number = 0;
 bool Abstract_VM_Version::_initialized = false;
+int Abstract_VM_Version::_parallel_worker_threads = 0;
+bool Abstract_VM_Version::_parallel_worker_threads_initialized = false;

 void Abstract_VM_Version::initialize() {
  if (_initialized) {
@@ -210,3 +212,43 @@ void VM_Version_init() {
  }
 #endif
 }
+
+unsigned int Abstract_VM_Version::nof_parallel_worker_threads(
+                                                      unsigned int num,
+                                                      unsigned int den,
+                                                      unsigned int switch_pt) {
+  if (FLAG_IS_DEFAULT(ParallelGCThreads)) {
+    assert(ParallelGCThreads == 0, "Default ParallelGCThreads is not 0");
+    // For very large machines, there are diminishing returns
+    // for large numbers of worker threads.  Instead of
+    // hogging the whole system, use a fraction of the workers for every
+    // processor after the first 8.  For example, on a 72 cpu machine
+    // and a chosen fraction of 5/8
+    // use 8 + (72 - 8) * (5/8) == 48 worker threads.
+    unsigned int ncpus = (unsigned int) os::active_processor_count();
+    return (ncpus <= switch_pt) ?
+           ncpus :
+          (switch_pt + ((ncpus - switch_pt) * num) / den);
+  } else {
+    return ParallelGCThreads;
+  }
+}
+
+unsigned int Abstract_VM_Version::calc_parallel_worker_threads() {
+  return nof_parallel_worker_threads(5, 8, 8);
+}
+
+
+// Does not set the _initialized flag since it is
+// a global flag.
+unsigned int Abstract_VM_Version::parallel_worker_threads() {
+  if (!_parallel_worker_threads_initialized) {
+    if (FLAG_IS_DEFAULT(ParallelGCThreads)) {
+      _parallel_worker_threads = VM_Version::calc_parallel_worker_threads();
+    } else {
+      _parallel_worker_threads = ParallelGCThreads;
+    }
+    _parallel_worker_threads_initialized = true;
+  }
+  return _parallel_worker_threads;
+}
--- a/src/share/vm/runtime/vm_version.hpp
+++ b/src/share/vm/runtime/vm_version.hpp
@@ -36,6 +36,12 @@ class Abstract_VM_Version: AllStatic {
  static int          _vm_minor_version;
  static int          _vm_build_number;
  static bool         _initialized;
+  static int          _parallel_worker_threads;
+  static bool         _parallel_worker_threads_initialized;
+
+  static unsigned int nof_parallel_worker_threads(unsigned int num,
+                                                  unsigned int dem,
+                                                  unsigned int switch_pt);
 public:
  static void initialize();

@@ -69,4 +75,13 @@ class Abstract_VM_Version: AllStatic {
  // subclasses should define new versions to hide this one as needed.  Note
  // that the O/S may support more sizes, but at most this many are used.
  static uint page_size_count() { return 2; }
+
+  // Returns the number of parallel threads to be used for VM
+  // work.  If that number has not been calculated, do so and
+  // save it.  Returns ParallelGCThreads if it is set on the
+  // command line.
+  static unsigned int parallel_worker_threads();
+  // Calculates and returns the number of parallel threads.  May
+  // be VM version specific.
+  static unsigned int calc_parallel_worker_threads();
 };