From 87e611ef8b9b553211efdb8e0a98973076b14076 Mon Sep 17 00:00:00 2001
From: "tongbao.ztb" <tongbao.ztb@alibaba-inc.com>
Date: Sun, 10 Mar 2019 20:05:03 +0800
Subject: [PATCH] [Misc] print the detail time when processing the gc roots
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
- port D64010 to dragonwell
- print the detail time when doing root processing in generational heap collection

Test Plan:
hotspot/test/gc/TestPrintGCRootsTraceTime.java
hotspot/test/gc/

Reviewers: 传胜

Differential Revision: https://aone.alibaba-inc.com/code/D851700
---
 .../concurrentMarkSweepGeneration.cpp         |  61 ++++-
 .../gc_implementation/g1/g1GCPhaseTimes.cpp   | 217 +-----------------
 .../gc_implementation/g1/g1GCPhaseTimes.hpp   |   3 +-
 .../parNew/parNewGeneration.cpp               |  14 +-
 .../gc_implementation/shared/gcTraceTime.cpp  | 204 +++++++++++++++-
 .../gc_implementation/shared/gcTraceTime.hpp  |  84 ++++++-
 .../shared/workerDataArray.cpp                |  41 ++++
 .../shared/workerDataArray.hpp                | 153 ++++++++++++
 .../shared/workerDataArray.inline.hpp         | 154 +++++++++++++
 src/share/vm/memory/collectorPolicy.cpp       |   7 +-
 src/share/vm/memory/collectorPolicy.hpp       |   7 +-
 src/share/vm/memory/defNewGeneration.cpp      |   8 +-
 src/share/vm/memory/genCollectedHeap.cpp      | 154 +++++++++----
 src/share/vm/memory/genCollectedHeap.hpp      |  15 +-
 src/share/vm/memory/genMarkSweep.cpp          |  32 ++-
 src/share/vm/runtime/globals_ext.hpp          |   3 +
 test/gc/TestPrintGCRootsTraceTime.java        | 192 ++++++++++++++++
 17 files changed, 1057 insertions(+), 292 deletions(-)
 create mode 100644 src/share/vm/gc_implementation/shared/workerDataArray.cpp
 create mode 100644 src/share/vm/gc_implementation/shared/workerDataArray.hpp
 create mode 100644 src/share/vm/gc_implementation/shared/workerDataArray.inline.hpp
 create mode 100644 test/gc/TestPrintGCRootsTraceTime.java

diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
index 1872e72e3..58d5d3402 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@@ -3058,6 +3058,8 @@ void CMSCollector::verify_after_remark_work_1() {
   HandleMark  hm;
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
+  GenGCPhaseTimes* phase_times = gch->gen_policy()->phase_times();
+
   // Get a clear set of claim bits for the roots processing to work with.
   ClassLoaderDataGraph::clear_claimed_marks();
 
@@ -3065,6 +3067,8 @@ void CMSCollector::verify_after_remark_work_1() {
   MarkRefsIntoClosure notOlder(_span, verification_mark_bm());
   gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
 
+  phase_times->note_gc_start(1);
+  phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerStart, 0, os::elapsedTime());
   gch->gen_process_roots(_cmsGen->level(),
                          true,   // younger gens are roots
                          true,   // activate StrongRootsScope
@@ -3072,7 +3076,10 @@ void CMSCollector::verify_after_remark_work_1() {
                          should_unload_classes(),
                          &notOlder,
                          NULL,
-                         NULL);  // SSS: Provide correct closure
+                         NULL,   // SSS: Provide correct closure
+                         phase_times,
+                         0);
+  phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerEnd, 0, os::elapsedTime());
 
   // Now mark from the roots
   MarkFromRootsClosure markFromRootsClosure(this, _span,
@@ -3123,6 +3130,7 @@ void CMSCollector::verify_after_remark_work_2() {
   HandleMark  hm;
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
+  GenGCPhaseTimes* phase_times = gch->gen_policy()->phase_times();
   // Get a clear set of claim bits for the roots processing to work with.
   ClassLoaderDataGraph::clear_claimed_marks();
 
@@ -3133,6 +3141,8 @@ void CMSCollector::verify_after_remark_work_2() {
 
   gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
 
+  phase_times->note_gc_start(1);
+  phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerStart, 0, os::elapsedTime());
   gch->gen_process_roots(_cmsGen->level(),
                          true,   // younger gens are roots
                          true,   // activate StrongRootsScope
@@ -3140,8 +3150,10 @@ void CMSCollector::verify_after_remark_work_2() {
                          should_unload_classes(),
                          &notOlder,
                          NULL,
-                         &cld_closure);
-
+                         &cld_closure,
+                         phase_times,
+                         0);
+  phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerEnd, 0, os::elapsedTime());
   // Now mark from the roots
   MarkFromRootsVerifyClosure markFromRootsClosure(this, _span,
     verification_mark_bm(), markBitMap(), verification_mark_stack());
@@ -3715,6 +3727,7 @@ void CMSCollector::checkpointRootsInitialWork(bool asynch) {
   // Update the saved marks which may affect the root scans.
   gch->save_marks();
 
+  GenGCPhaseTimes* phase_times = gch->gen_policy()->phase_times();
   // weak reference processing has not started yet.
   ref_processor()->set_enqueuing_is_done(false);
 
@@ -3737,6 +3750,7 @@ void CMSCollector::checkpointRootsInitialWork(bool asynch) {
       FlexibleWorkGang* workers = gch->workers();
       assert(workers != NULL, "Need parallel worker threads.");
       int n_workers = workers->active_workers();
+      phase_times->note_gc_start(n_workers);
       CMSParInitialMarkTask tsk(this, n_workers);
       gch->set_par_threads(n_workers);
       initialize_sequential_subtasks_for_young_gen_rescan(n_workers);
@@ -3752,6 +3766,8 @@ void CMSCollector::checkpointRootsInitialWork(bool asynch) {
       // The serial version.
       CLDToOopClosure cld_closure(&notOlder, true);
       gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
+      phase_times->note_gc_start(1);
+      phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerStart, 0, os::elapsedTime());
       gch->gen_process_roots(_cmsGen->level(),
                              true,   // younger gens are roots
                              true,   // activate StrongRootsScope
@@ -3759,7 +3775,14 @@ void CMSCollector::checkpointRootsInitialWork(bool asynch) {
                              should_unload_classes(),
                              &notOlder,
                              NULL,
-                             &cld_closure);
+                             &cld_closure,
+                             phase_times,
+                             0);
+      phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerEnd, 0, os::elapsedTime());
+    }
+    phase_times->note_gc_end();
+    if (PrintGCRootsTraceTime && PrintGCDetails) {
+      phase_times->log_gc_details();
     }
   }
 
@@ -5234,6 +5257,8 @@ void CMSParInitialMarkTask::work(uint worker_id) {
   // ---------- scan from roots --------------
   _timer.start();
   GenCollectedHeap* gch = GenCollectedHeap::heap();
+  GenGCPhaseTimes* phase_times = gch->gen_policy()->phase_times();
+  phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerStart, worker_id, os::elapsedTime());
   Par_MarkRefsIntoClosure par_mri_cl(_collector->_span, &(_collector->_markBitMap));
 
   // ---------- young gen roots --------------
@@ -5260,7 +5285,9 @@ void CMSParInitialMarkTask::work(uint worker_id) {
                          _collector->should_unload_classes(),
                          &par_mri_cl,
                          NULL,
-                         &cld_closure);
+                         &cld_closure,
+                         phase_times,
+                         worker_id);
   assert(_collector->should_unload_classes()
          || (_collector->CMSCollector::roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
          "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
@@ -5270,6 +5297,7 @@ void CMSParInitialMarkTask::work(uint worker_id) {
       "Finished remaining root initial mark scan work in %dth thread: %3.3f sec",
       worker_id, _timer.seconds());
   }
+  phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerEnd, worker_id, os::elapsedTime());
 }
 
 // Parallel remark task
@@ -5366,6 +5394,8 @@ void CMSParRemarkTask::work(uint worker_id) {
   // ---------- rescan from roots --------------
   _timer.start();
   GenCollectedHeap* gch = GenCollectedHeap::heap();
+  GenGCPhaseTimes* phase_times = gch->gen_policy()->phase_times();
+  phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerStart, worker_id, os::elapsedTime());
   Par_MarkRefsIntoAndScanClosure par_mrias_cl(_collector,
     _collector->_span, _collector->ref_processor(),
     &(_collector->_markBitMap),
@@ -5396,7 +5426,9 @@ void CMSParRemarkTask::work(uint worker_id) {
                          _collector->should_unload_classes(),
                          &par_mrias_cl,
                          NULL,
-                         NULL);     // The dirty klasses will be handled below
+                         NULL,      // The dirty klasses will be handled below
+                         phase_times,
+                         worker_id);
 
   assert(_collector->should_unload_classes()
          || (_collector->CMSCollector::roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
@@ -5481,6 +5513,7 @@ void CMSParRemarkTask::work(uint worker_id) {
       "Finished work stealing in %dth thread: %3.3f sec",
       worker_id, _timer.seconds());
   }
+  phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerEnd, worker_id, os::elapsedTime());
 }
 
 // Note that parameter "i" is not used.
@@ -5898,6 +5931,8 @@ void CMSCollector::do_remark_parallel() {
   // claimed by the parallel threads.
   cms_space->initialize_sequential_subtasks_for_rescan(n_workers);
 
+  GenGCPhaseTimes* phase_times = gch->gen_policy()->phase_times();
+  phase_times->note_gc_start(n_workers);
   // It turns out that even when we're using 1 thread, doing the work in a
   // separate thread causes wide variance in run times.  We can't help this
   // in the multi-threaded case, but we special-case n=1 here to get
@@ -5915,6 +5950,11 @@ void CMSCollector::do_remark_parallel() {
     tsk.work(0);
   }
 
+  phase_times->note_gc_end();
+  if (PrintGCRootsTraceTime && PrintGCDetails) {
+    phase_times->log_gc_details();
+  }
+
   gch->set_par_threads(0);  // 0 ==> non-parallel.
   // restore, single-threaded for now, any preserved marks
   // as a result of work_q overflow
@@ -5927,7 +5967,7 @@ void CMSCollector::do_remark_non_parallel() {
   HandleMark   hm;
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), false);
-
+  GenGCPhaseTimes* phase_times = gch->gen_policy()->phase_times();
   MarkRefsIntoAndScanClosure
     mrias_cl(_span, ref_processor(), &_markBitMap, NULL /* not precleaning */,
              &_markStack, this,
@@ -5981,6 +6021,8 @@ void CMSCollector::do_remark_non_parallel() {
     gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
     GenCollectedHeap::StrongRootsScope srs(gch);
 
+    phase_times->note_gc_start(1);
+    phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerStart, 0, os::elapsedTime());
     gch->gen_process_roots(_cmsGen->level(),
                            true,  // younger gens as roots
                            false, // use the local StrongRootsScope
@@ -5988,7 +6030,10 @@ void CMSCollector::do_remark_non_parallel() {
                            should_unload_classes(),
                            &mrias_cl,
                            NULL,
-                           NULL); // The dirty klasses will be handled below
+                           NULL,  // The dirty klasses will be handled below
+                           phase_times,
+                           0);
+    phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerEnd, 0, os::elapsedTime());
 
     assert(should_unload_classes()
            || (roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
diff --git a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
index d43976dd6..fcf09270a 100644
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,223 +27,10 @@
 #include "gc_implementation/g1/g1GCPhaseTimes.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1StringDedup.hpp"
+#include "gc_implementation/shared/workerDataArray.inline.hpp"
 #include "memory/allocation.hpp"
 #include "runtime/os.hpp"
 
-// Helper class for avoiding interleaved logging
-class LineBuffer: public StackObj {
-
-private:
-  static const int BUFFER_LEN = 1024;
-  static const int INDENT_CHARS = 3;
-  char _buffer[BUFFER_LEN];
-  int _indent_level;
-  int _cur;
-
-  void vappend(const char* format, va_list ap)  ATTRIBUTE_PRINTF(2, 0) {
-    int res = os::vsnprintf(&_buffer[_cur], BUFFER_LEN - _cur, format, ap);
-    if (res > BUFFER_LEN) {
-      DEBUG_ONLY(warning("buffer too small in LineBuffer");)
-      _buffer[BUFFER_LEN -1] = 0;
-      _cur = BUFFER_LEN; // vsnprintf above should not add to _buffer if we are called again
-    } else if (res != -1) {
-      _cur += res;
-    }
-  }
-
-public:
-  explicit LineBuffer(int indent_level): _indent_level(indent_level), _cur(0) {
-    for (; (_cur < BUFFER_LEN && _cur < (_indent_level * INDENT_CHARS)); _cur++) {
-      _buffer[_cur] = ' ';
-    }
-  }
-
-#ifndef PRODUCT
-  ~LineBuffer() {
-    assert(_cur == _indent_level * INDENT_CHARS, "pending data in buffer - append_and_print_cr() not called?");
-  }
-#endif
-
-  void append(const char* format, ...)  ATTRIBUTE_PRINTF(2, 3) {
-    va_list ap;
-    va_start(ap, format);
-    vappend(format, ap);
-    va_end(ap);
-  }
-
-  void print_cr() {
-    gclog_or_tty->print_cr("%s", _buffer);
-    _cur = _indent_level * INDENT_CHARS;
-  }
-
-  void append_and_print_cr(const char* format, ...)  ATTRIBUTE_PRINTF(2, 3) {
-    va_list ap;
-    va_start(ap, format);
-    vappend(format, ap);
-    va_end(ap);
-    print_cr();
-  }
-};
-
-template <class T>
-class WorkerDataArray  : public CHeapObj<mtGC> {
-  friend class G1GCParPhasePrinter;
-  T*          _data;
-  uint        _length;
-  const char* _title;
-  bool        _print_sum;
-  int         _log_level;
-  uint        _indent_level;
-  bool        _enabled;
-
-  WorkerDataArray<size_t>* _thread_work_items;
-
-  NOT_PRODUCT(T uninitialized();)
-
-  // We are caching the sum and average to only have to calculate them once.
-  // This is not done in an MT-safe way. It is intended to allow single
-  // threaded code to call sum() and average() multiple times in any order
-  // without having to worry about the cost.
-  bool   _has_new_data;
-  T      _sum;
-  T      _min;
-  T      _max;
-  double _average;
-
- public:
-  WorkerDataArray(uint length, const char* title, bool print_sum, int log_level, uint indent_level) :
-    _title(title), _length(0), _print_sum(print_sum), _log_level(log_level), _indent_level(indent_level),
-    _has_new_data(true), _thread_work_items(NULL), _enabled(true) {
-    assert(length > 0, "Must have some workers to store data for");
-    _length = length;
-    _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
-  }
-
-  ~WorkerDataArray() {
-    FREE_C_HEAP_ARRAY(T, _data, mtGC);
-  }
-
-  void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items) {
-    _thread_work_items = thread_work_items;
-  }
-
-  WorkerDataArray<size_t>* thread_work_items() { return _thread_work_items; }
-
-  void set(uint worker_i, T value) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] == WorkerDataArray<T>::uninitialized(), err_msg("Overwriting data for worker %d in %s", worker_i, _title));
-    _data[worker_i] = value;
-    _has_new_data = true;
-  }
-
-  void set_thread_work_item(uint worker_i, size_t value) {
-    assert(_thread_work_items != NULL, "No sub count");
-    _thread_work_items->set(worker_i, value);
-  }
-
-  T get(uint worker_i) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data added for worker %d", worker_i));
-    return _data[worker_i];
-  }
-
-  void add(uint worker_i, T value) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data to add to for worker %d", worker_i));
-    _data[worker_i] += value;
-    _has_new_data = true;
-  }
-
-  double average(uint active_threads){
-    calculate_totals(active_threads);
-    return _average;
-  }
-
-  T sum(uint active_threads) {
-    calculate_totals(active_threads);
-    return _sum;
-  }
-
-  T minimum(uint active_threads) {
-    calculate_totals(active_threads);
-    return _min;
-  }
-
-  T maximum(uint active_threads) {
-    calculate_totals(active_threads);
-    return _max;
-  }
-
-  void reset() PRODUCT_RETURN;
-  void verify(uint active_threads) PRODUCT_RETURN;
-
-  void set_enabled(bool enabled) { _enabled = enabled; }
-
-  int log_level() { return _log_level;  }
-
- private:
-
-  void calculate_totals(uint active_threads){
-    if (!_has_new_data) {
-      return;
-    }
-
-    _sum = (T)0;
-    _min = _data[0];
-    _max = _min;
-    assert(active_threads <= _length, "Wrong number of active threads");
-    for (uint i = 0; i < active_threads; ++i) {
-      T val = _data[i];
-      _sum += val;
-      _min = MIN2(_min, val);
-      _max = MAX2(_max, val);
-    }
-    _average = (double)_sum / (double)active_threads;
-    _has_new_data = false;
-  }
-};
-
-
-#ifndef PRODUCT
-
-template <>
-size_t WorkerDataArray<size_t>::uninitialized() {
-  return (size_t)-1;
-}
-
-template <>
-double WorkerDataArray<double>::uninitialized() {
-  return -1.0;
-}
-
-template <class T>
-void WorkerDataArray<T>::reset() {
-  for (uint i = 0; i < _length; i++) {
-    _data[i] = WorkerDataArray<T>::uninitialized();
-  }
-  if (_thread_work_items != NULL) {
-    _thread_work_items->reset();
-  }
-}
-
-template <class T>
-void WorkerDataArray<T>::verify(uint active_threads) {
-  if (!_enabled) {
-    return;
-  }
-
-  assert(active_threads <= _length, "Wrong number of active threads");
-  for (uint i = 0; i < active_threads; i++) {
-    assert(_data[i] != WorkerDataArray<T>::uninitialized(),
-        err_msg("Invalid data for worker %u in '%s'", i, _title));
-  }
-  if (_thread_work_items != NULL) {
-    _thread_work_items->verify(active_threads);
-  }
-}
-
-#endif
-
 G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
   _max_gc_threads(max_gc_threads)
 {
diff --git a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
index 83e7235c4..e7d6e40b9 100644
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
 
 #include "memory/allocation.hpp"
+#include "gc_implementation/shared/workerDataArray.hpp"
 
 class LineBuffer;
 
diff --git a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
index 80c355c96..2728986a7 100644
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
@@ -598,6 +598,9 @@ void ParNewGenTask::set_for_termination(int active_workers) {
 
 void ParNewGenTask::work(uint worker_id) {
   GenCollectedHeap* gch = GenCollectedHeap::heap();
+  GenGCPhaseTimes* phase_times = gch->gen_policy()->phase_times();
+  phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerStart, worker_id, os::elapsedTime());
+
   // Since this is being done in a separate thread, need new resource
   // and handle marks.
   ResourceMark rm;
@@ -627,12 +630,15 @@ void ParNewGenTask::work(uint worker_id) {
                          GenCollectedHeap::StrongAndWeakRoots,
                          &par_scan_state.to_space_root_closure(),
                          &par_scan_state.older_gen_closure(),
-                         &cld_scan_closure);
+                         &cld_scan_closure,
+                         phase_times,
+                         worker_id);
 
   par_scan_state.end_strong_roots();
 
   // "evacuate followers".
   par_scan_state.evacuate_followers_closure().do_void();
+  phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerEnd, worker_id, os::elapsedTime());
 }
 
 #ifdef _MSC_VER
@@ -919,6 +925,7 @@ void ParNewGeneration::collect(bool   full,
 
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
+  GenGCPhaseTimes* phase_times = gch->gen_policy()->phase_times();
   _gc_timer->register_gc_start();
 
   assert(gch->kind() == CollectedHeap::GenCollectedHeap,
@@ -931,6 +938,7 @@ void ParNewGeneration::collect(bool   full,
                                    workers->active_workers(),
                                    Threads::number_of_non_daemon_threads());
   workers->set_active_workers(active_workers);
+  phase_times->note_gc_start(active_workers);
   assert(gch->n_gens() == 2,
          "Par collection currently only works with single older gen.");
   _next_gen = gch->next_gen(this);
@@ -999,6 +1007,10 @@ void ParNewGeneration::collect(bool   full,
   thread_state_set.reset(0 /* Bad value in debug if not reset */,
                          promotion_failed());
 
+  phase_times->note_gc_end();
+  if (PrintGCRootsTraceTime && PrintGCDetails) {
+    phase_times->log_gc_details();
+  }
   // Process (weak) reference objects found during scavenge.
   ReferenceProcessor* rp = ref_processor();
   IsAliveClosure is_alive(this);
diff --git a/src/share/vm/gc_implementation/shared/gcTraceTime.cpp b/src/share/vm/gc_implementation/shared/gcTraceTime.cpp
index 3aed30bd5..b166bac2a 100644
--- a/src/share/vm/gc_implementation/shared/gcTraceTime.cpp
+++ b/src/share/vm/gc_implementation/shared/gcTraceTime.cpp
@@ -26,6 +26,7 @@
 #include "gc_implementation/shared/gcTimer.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/gcTraceTime.hpp"
+#include "gc_implementation/shared/workerDataArray.inline.hpp"
 #include "runtime/globals.hpp"
 #include "runtime/os.hpp"
 #include "runtime/safepoint.hpp"
@@ -34,7 +35,6 @@
 #include "utilities/ostream.hpp"
 #include "utilities/ticks.inline.hpp"
 
-
 GCTraceTime::GCTraceTime(const char* title, bool doit, bool print_cr, GCTimer* timer, GCId gc_id) :
     _title(title), _doit(doit), _print_cr(print_cr), _timer(timer), _start_counter() {
   if (_doit || _timer != NULL) {
@@ -82,3 +82,205 @@ GCTraceTime::~GCTraceTime() {
     gclog_or_tty->flush();
   }
 }
+
+//========================================================================
+// GenGC Log level class
+class GenGCLog : public AllStatic {
+ public:
+  typedef enum {
+    LevelNone,
+    LevelFine,
+    LevelFinest,
+  } LogLevel;
+
+ private:
+  static LogLevel _level;
+ public:
+  inline static bool fine() {
+    return _level >= LevelFine;
+  }
+
+  inline static bool finest() {
+    return _level == LevelFinest;
+  }
+
+  static LogLevel level() {
+    return _level;
+  }
+};
+
+GenGCLog::LogLevel GenGCLog::_level = GenGCLog::LevelNone;
+
+GenGCPhaseTimes::GenGCPhaseTimes(uint max_gc_threads) :
+  _max_gc_threads(max_gc_threads)
+{
+  assert(max_gc_threads > 0, "Must have some GC threads");
+
+  _gc_par_phases[GCWorkerStart] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Start (ms)", false, GenGCLog::LevelFine, 2);
+  _gc_par_phases[RootProcess] = new WorkerDataArray<double>(max_gc_threads, "Root Processing (ms)", true, GenGCLog::LevelFine, 2);
+
+  // Root scanning phases
+  _gc_par_phases[ThreadRoots] = new WorkerDataArray<double>(max_gc_threads, "Thread Roots (ms)", true, GenGCLog::LevelFinest, 3);
+  _gc_par_phases[StringTableRoots] = new WorkerDataArray<double>(max_gc_threads, "StringTable Roots (ms)", true, GenGCLog::LevelFinest, 3);
+  _gc_par_phases[UniverseRoots] = new WorkerDataArray<double>(max_gc_threads, "Universe Roots (ms)", true, GenGCLog::LevelFinest, 3);
+  _gc_par_phases[JNIRoots] = new WorkerDataArray<double>(max_gc_threads, "JNI Handles Roots (ms)", true, GenGCLog::LevelFinest, 3);
+  _gc_par_phases[ObjectSynchronizerRoots] = new WorkerDataArray<double>(max_gc_threads, "ObjectSynchronizer Roots (ms)", true, GenGCLog::LevelFinest, 3);
+  _gc_par_phases[FlatProfilerRoots] = new WorkerDataArray<double>(max_gc_threads, "FlatProfiler Roots (ms)", true, GenGCLog::LevelFinest, 3);
+  _gc_par_phases[ManagementRoots] = new WorkerDataArray<double>(max_gc_threads, "Management Roots (ms)", true, GenGCLog::LevelFinest, 3);
+  _gc_par_phases[SystemDictionaryRoots] = new WorkerDataArray<double>(max_gc_threads, "SystemDictionary Roots (ms)", true, GenGCLog::LevelFinest, 3);
+  _gc_par_phases[CLDGRoots] = new WorkerDataArray<double>(max_gc_threads, "CLDG Roots (ms)", true, GenGCLog::LevelFinest, 3);
+  _gc_par_phases[JVMTIRoots] = new WorkerDataArray<double>(max_gc_threads, "JVMTI Roots (ms)", true, GenGCLog::LevelFinest, 3);
+  _gc_par_phases[CodeCacheRoots] = new WorkerDataArray<double>(max_gc_threads, "CodeCache Roots (ms)", true, GenGCLog::LevelFinest, 3);
+
+  _gc_par_phases[OldGenScan] = new WorkerDataArray<double>(max_gc_threads, "older-gen scanning (ms)", true, GenGCLog::LevelFine, 3);
+
+  _gc_par_phases[Other] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Other (ms)", true, GenGCLog::LevelFine,     2);
+  _gc_par_phases[GCWorkerTotal] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Total (ms)", true, GenGCLog::LevelFine, 2);
+  _gc_par_phases[GCWorkerEnd] = new WorkerDataArray<double>(max_gc_threads, "GC Worker End (ms)", false, GenGCLog::LevelFine, 2);
+}
+
+void GenGCPhaseTimes::note_gc_start(uint active_gc_threads) {
+  assert(active_gc_threads > 0, "The number of threads must be > 0");
+  assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max number of threads");
+  _active_gc_threads = active_gc_threads;
+
+  for (int i = 0; i < GCParPhasesSentinel; i++) {
+    _gc_par_phases[i]->reset();
+  }
+}
+
+void GenGCPhaseTimes::note_gc_end() {
+  for (uint i = 0; i < _active_gc_threads; i++) {
+    double worker_time = _gc_par_phases[GCWorkerEnd]->get(i) - _gc_par_phases[GCWorkerStart]->get(i);
+    record_time_secs(GCWorkerTotal, i , worker_time);
+
+    double worker_known_time = _gc_par_phases[RootProcess]->get(i);
+    record_time_secs(Other, i, worker_time - worker_known_time);
+  }
+
+  for (int i = 0; i < GCParPhasesSentinel; i++) {
+    _gc_par_phases[i]->verify(_active_gc_threads);
+  }
+}
+
+void GenGCPhaseTimes::print_stats(int level, const char* str, double value) {
+  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms]", str, value);
+}
+
+void GenGCPhaseTimes::print_stats(int level, const char* str, double value, uint workers) {
+  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: %u]", str, value, workers);
+}
+
+// record the time a phase took in seconds
+void GenGCPhaseTimes::record_time_secs(GCParPhases phase, uint worker_i, double secs) {
+  _gc_par_phases[phase]->set(worker_i, secs);
+}
+
+// add a number of seconds to a phase
+void GenGCPhaseTimes::add_time_secs(GCParPhases phase, uint worker_i, double secs) {
+  _gc_par_phases[phase]->add(worker_i, secs);
+}
+
+// return the average time for a phase in milliseconds
+double GenGCPhaseTimes::average_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->average(_active_gc_threads) * 1000.0;
+}
+
+double GenGCPhaseTimes::get_time_ms(GCParPhases phase, uint worker_i) {
+  return _gc_par_phases[phase]->get(worker_i) * 1000.0;
+}
+
+double GenGCPhaseTimes::sum_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->sum(_active_gc_threads) * 1000.0;
+}
+
+double GenGCPhaseTimes::min_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->minimum(_active_gc_threads) * 1000.0;
+}
+
+double GenGCPhaseTimes::max_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->maximum(_active_gc_threads) * 1000.0;
+}
+
+double GenGCPhaseTimes::average_thread_work_items(GCParPhases phase) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->average(_active_gc_threads);
+}
+
+class GenGCParPhasePrinter : public StackObj {
+  GenGCPhaseTimes* _phase_times;
+ public:
+  GenGCParPhasePrinter(GenGCPhaseTimes* phase_times) : _phase_times(phase_times) {}
+
+  void print(GenGCPhaseTimes::GCParPhases phase_id) {
+    WorkerDataArray<double>* phase = _phase_times->_gc_par_phases[phase_id];
+
+    if (phase->_length == 1) {
+      print_single_length(phase_id, phase);
+    } else {
+      print_multi_length(phase_id, phase);
+    }
+  }
+
+ private:
+
+  void print_single_length(GenGCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
+    // No need for min, max, average and sum for only one worker
+    LineBuffer buf(phase->_indent_level);
+    buf.append_and_print_cr("[%s:  %.1lf]", phase->_title, _phase_times->get_time_ms(phase_id, 0));
+  }
+
+  void print_time_values(LineBuffer& buf, GenGCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
+    uint active_length = _phase_times->_active_gc_threads;
+    for (uint i = 0; i < active_length; ++i) {
+      buf.append("  %.1lf", _phase_times->get_time_ms(phase_id, i));
+    }
+    buf.print_cr();
+  }
+
+  void print_multi_length(GenGCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
+    LineBuffer buf(phase->_indent_level);
+    buf.append("[%s:", phase->_title);
+
+    print_time_values(buf, phase_id, phase);
+
+    buf.append(" Min: %.1lf, Avg: %.1lf, Max: %.1lf, Diff: %.1lf",
+        _phase_times->min_time_ms(phase_id), _phase_times->average_time_ms(phase_id), _phase_times->max_time_ms(phase_id),
+        _phase_times->max_time_ms(phase_id) - _phase_times->min_time_ms(phase_id));
+
+    if (phase->_print_sum) {
+      // for things like the start and end times the sum is not
+      // that relevant
+      buf.append(", Sum: %.1lf", _phase_times->sum_time_ms(phase_id));
+    }
+
+    buf.append_and_print_cr("]");
+  }
+};
+
+void GenGCPhaseTimes::print() {
+  GenGCParPhasePrinter par_phase_printer(this);
+
+  for (int i = 0; i <= GCMainParPhasesLast; i++) {
+    par_phase_printer.print((GCParPhases) i);
+  }
+}
+
+void GenGCPhaseTimes::log_gc_details() {
+  this->print();
+  gclog_or_tty->flush();
+}
+
+GenGCParPhaseTimesTracker::GenGCParPhaseTimesTracker(GenGCPhaseTimes* phase_times, GenGCPhaseTimes::GCParPhases phase, uint worker_id) :
+    _phase_times(phase_times), _phase(phase), _worker_id(worker_id) {
+  if (_phase_times != NULL) {
+    _start_time = os::elapsedTime();
+  }
+}
+
+GenGCParPhaseTimesTracker::~GenGCParPhaseTimesTracker() {
+  if (_phase_times != NULL) {
+    _phase_times->record_time_secs(_phase, _worker_id, os::elapsedTime() - _start_time);
+  }
+}
+
diff --git a/src/share/vm/gc_implementation/shared/gcTraceTime.hpp b/src/share/vm/gc_implementation/shared/gcTraceTime.hpp
index 30e494baa..b92a8da28 100644
--- a/src/share/vm/gc_implementation/shared/gcTraceTime.hpp
+++ b/src/share/vm/gc_implementation/shared/gcTraceTime.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,8 +28,11 @@
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "prims/jni_md.h"
 #include "utilities/ticks.hpp"
+#include "gc_implementation/shared/workerDataArray.hpp"
 
 class GCTimer;
+class LineBuffer;
+template <class T> class WorkerDataArray;
 
 class GCTraceTime {
   const char* _title;
@@ -43,4 +46,83 @@ class GCTraceTime {
   ~GCTraceTime();
 };
 
+class GenGCPhaseTimes : public CHeapObj<mtGC> {
+  friend class GenGCParPhasePrinter;
+
+  uint _active_gc_threads;
+  uint _max_gc_threads;
+
+ public:
+  enum GCParPhases {
+    GCWorkerStart,
+    RootProcess,
+    ThreadRoots,
+    StringTableRoots,
+    UniverseRoots,
+    JNIRoots,
+    ObjectSynchronizerRoots,
+    FlatProfilerRoots,
+    ManagementRoots,
+    SystemDictionaryRoots,
+    CLDGRoots,
+    CodeCacheRoots,
+    JVMTIRoots,
+    OldGenScan,
+    Other,
+    GCWorkerTotal,
+    GCWorkerEnd,
+    GCParPhasesSentinel
+  };
+private:
+  // Markers for grouping the phases in the GCPhases enum above
+  static const int GCMainParPhasesLast = GCWorkerEnd;
+
+  WorkerDataArray<double>* _gc_par_phases[GCParPhasesSentinel];
+
+  double _cur_collection_par_time_ms;
+
+  // Helper methods for detailed logging
+  void print_stats(int level, const char* str, double value);
+  void print_stats(int level, const char* str, double value, uint workers);
+
+ public:
+  GenGCPhaseTimes(uint max_gc_threads);
+  void note_gc_start(uint active_gc_threads);
+  void note_gc_end();
+  void print();
+  void log_gc_details();
+
+  // record the time a phase took in seconds
+  void record_time_secs(GCParPhases phase, uint worker_i, double secs);
+
+  // add a number of seconds to a phase
+  void add_time_secs(GCParPhases phase, uint worker_i, double secs);
+
+  // return the average time for a phase in milliseconds
+  double average_time_ms(GCParPhases phase);
+
+ private:
+  double get_time_ms(GCParPhases phase, uint worker_i);
+  double sum_time_ms(GCParPhases phase);
+  double min_time_ms(GCParPhases phase);
+  double max_time_ms(GCParPhases phase);
+  double average_thread_work_items(GCParPhases phase);
+
+ public:
+
+  void record_par_time(double ms) {
+    _cur_collection_par_time_ms = ms;
+  }
+};
+
+class GenGCParPhaseTimesTracker : public StackObj {
+  double _start_time;
+  GenGCPhaseTimes::GCParPhases _phase;
+  GenGCPhaseTimes* _phase_times;
+  uint _worker_id;
+public:
+  GenGCParPhaseTimesTracker(GenGCPhaseTimes* phase_times, GenGCPhaseTimes::GCParPhases phase, uint worker_id);
+  ~GenGCParPhaseTimesTracker();
+};
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_GCTRACETIME_HPP
diff --git a/src/share/vm/gc_implementation/shared/workerDataArray.cpp b/src/share/vm/gc_implementation/shared/workerDataArray.cpp
new file mode 100644
index 000000000..fc3f03f15
--- /dev/null
+++ b/src/share/vm/gc_implementation/shared/workerDataArray.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/shared/workerDataArray.inline.hpp"
+#include "utilities/ostream.hpp"
+
+#ifndef PRODUCT
+
+template <>
+size_t WorkerDataArray<size_t>::uninitialized() {
+  return (size_t)-1;
+}
+
+template <>
+double WorkerDataArray<double>::uninitialized() {
+  return -1.0;
+}
+
+#endif
diff --git a/src/share/vm/gc_implementation/shared/workerDataArray.hpp b/src/share/vm/gc_implementation/shared/workerDataArray.hpp
new file mode 100644
index 000000000..eb76636a7
--- /dev/null
+++ b/src/share/vm/gc_implementation/shared/workerDataArray.hpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_WORKERDATAARRAY_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_SHARED_WORKERDATAARRAY_HPP
+
+#include "memory/allocation.hpp"
+#include "utilities/debug.hpp"
+
+// These classes (WorkerDataArray and LineBuffer) are moved here from the original gc_implementation/g1/g1GCPhaseTimes.cpp
+// And can cache the data for the parallel gc workers' performance data in G1GC and ParNew + CMS GC
+
+template <class T>
+class WorkerDataArray  : public CHeapObj<mtGC> {
+  friend class G1GCParPhasePrinter;
+  friend class GenGCParPhasePrinter;
+  T*          _data;
+  uint        _length;
+  const char* _title;
+  bool        _print_sum;
+  int         _log_level;
+  uint        _indent_level;
+  bool        _enabled;
+
+  WorkerDataArray<size_t>* _thread_work_items;
+
+  NOT_PRODUCT(T uninitialized();)
+
+  // We are caching the sum and average to only have to calculate them once.
+  // This is not done in an MT-safe way. It is intended to allow single
+  // threaded code to call sum() and average() multiple times in any order
+  // without having to worry about the cost.
+  bool   _has_new_data;
+  T      _sum;
+  T      _min;
+  T      _max;
+  double _average;
+
+ public:
+  WorkerDataArray(uint length, const char* title, bool print_sum, int log_level, uint indent_level);
+
+  ~WorkerDataArray();
+
+  void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items);
+
+  WorkerDataArray<size_t>* thread_work_items() { return _thread_work_items; }
+
+  void set(uint worker_i, T value);
+
+  void set_thread_work_item(uint worker_i, size_t value);
+
+  T get(uint worker_i);
+
+  void add(uint worker_i, T value);
+
+  double average(uint active_threads);
+
+  T sum(uint active_threads);
+
+  T minimum(uint active_threads);
+
+  T maximum(uint active_threads);
+
+  void reset() PRODUCT_RETURN;
+  void verify(uint active_threads) PRODUCT_RETURN;
+
+  void set_enabled(bool enabled) { _enabled = enabled; }
+
+  int log_level() { return _log_level;  }
+
+ private:
+
+  void calculate_totals(uint active_threads);
+};
+
+//=========================================================================
+// Helper class for avoiding interleaved logging
+class LineBuffer: public StackObj {
+
+private:
+  static const int BUFFER_LEN = 1024;
+  static const int INDENT_CHARS = 3;
+  char _buffer[BUFFER_LEN];
+  int _indent_level;
+  int _cur;
+
+  void vappend(const char* format, va_list ap)  ATTRIBUTE_PRINTF(2, 0) {
+    int res = vsnprintf(&_buffer[_cur], BUFFER_LEN - _cur, format, ap);
+    if (res != -1) {
+      _cur += res;
+    } else {
+      DEBUG_ONLY(warning("buffer too small in LineBuffer");)
+      _buffer[BUFFER_LEN -1] = 0;
+      _cur = BUFFER_LEN; // vsnprintf above should not add to _buffer if we are called again
+    }
+  }
+
+public:
+  explicit LineBuffer(int indent_level): _indent_level(indent_level), _cur(0) {
+    for (; (_cur < BUFFER_LEN && _cur < (_indent_level * INDENT_CHARS)); _cur++) {
+      _buffer[_cur] = ' ';
+    }
+  }
+ 
+#ifndef PRODUCT
+  ~LineBuffer() {
+    assert(_cur == _indent_level * INDENT_CHARS, "pending data in buffer - append_and_print_cr() not called?");
+  }
+#endif
+
+  void append(const char* format, ...)  ATTRIBUTE_PRINTF(2, 3) {
+    va_list ap;
+    va_start(ap, format);
+    vappend(format, ap);
+    va_end(ap);
+  }
+
+  void print_cr() {
+    gclog_or_tty->print_cr("%s", _buffer);
+    _cur = _indent_level * INDENT_CHARS;
+  }
+
+  void append_and_print_cr(const char* format, ...)  ATTRIBUTE_PRINTF(2, 3) {
+    va_list ap;
+    va_start(ap, format);
+    vappend(format, ap);
+    va_end(ap);
+    print_cr();
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_WORKERDATAARRAY_HPP
diff --git a/src/share/vm/gc_implementation/shared/workerDataArray.inline.hpp b/src/share/vm/gc_implementation/shared/workerDataArray.inline.hpp
new file mode 100644
index 000000000..119901cba
--- /dev/null
+++ b/src/share/vm/gc_implementation/shared/workerDataArray.inline.hpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_WORKERDATAARRAY_INLINE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_SHARED_WORKERDATAARRAY_INLINE_HPP
+
+#include "gc_implementation/shared/workerDataArray.hpp"
+#include "memory/allocation.hpp"
+#include "utilities/debug.hpp"
+#include "runtime/os.hpp"
+#include "utilities/ostream.hpp"
+
+template <class T>
+WorkerDataArray<T>::WorkerDataArray(uint length, const char* title, bool print_sum, int log_level, uint indent_level):
+  _title(title), _length(0), _print_sum(print_sum), _log_level(log_level), _indent_level(indent_level),
+  _has_new_data(true), _thread_work_items(NULL), _enabled(true) {
+  assert(length > 0, "Must have some workers to store data for");
+  _length = length;
+  _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
+}
+
+template <class T>
+WorkerDataArray<T>::~WorkerDataArray() {
+  FREE_C_HEAP_ARRAY(T, _data, mtGC);
+}
+
+template <class T>
+void WorkerDataArray<T>::link_thread_work_items(WorkerDataArray<size_t>* thread_work_items) {
+  _thread_work_items = thread_work_items;
+}
+
+template <class T>
+void WorkerDataArray<T>::set(uint worker_i, T value) {
+  assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+  assert(_data[worker_i] == WorkerDataArray<T>::uninitialized(), err_msg("Overwriting data for worker %d in %s", worker_i, _title));
+  _data[worker_i] = value;
+  _has_new_data = true;
+}
+
+template <class T>
+void WorkerDataArray<T>::set_thread_work_item(uint worker_i, size_t value) {
+  assert(_thread_work_items != NULL, "No sub count");
+  _thread_work_items->set(worker_i, value);
+}
+
+template <class T>
+T WorkerDataArray<T>::get(uint worker_i) {
+  assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+  assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data added for worker %d", worker_i));
+  return _data[worker_i];
+}
+
+template <class T>
+void WorkerDataArray<T>::add(uint worker_i, T value) {
+  assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+  assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data to add to for worker %d", worker_i));
+  _data[worker_i] += value;
+  _has_new_data = true;
+}
+
+template <class T>
+double WorkerDataArray<T>::average(uint active_threads){
+  calculate_totals(active_threads);
+  return _average;
+}
+
+template <class T>
+T WorkerDataArray<T>::sum(uint active_threads) {
+  calculate_totals(active_threads);
+  return _sum;
+}
+
+template <class T>
+T WorkerDataArray<T>::minimum(uint active_threads) {
+  calculate_totals(active_threads);
+  return _min;
+}
+
+template <class T>
+T WorkerDataArray<T>::maximum(uint active_threads) {
+  calculate_totals(active_threads);
+  return _max;
+}
+
+template <class T>
+void WorkerDataArray<T>::calculate_totals(uint active_threads){
+  if (!_has_new_data) {
+    return;
+  }
+
+  _sum = (T)0;
+  _min = _data[0];
+  _max = _min;
+  assert(active_threads <= _length, "Wrong number of active threads");
+  for (uint i = 0; i < active_threads; ++i) {
+    T val = _data[i];
+    _sum += val;
+    _min = MIN2(_min, val);
+    _max = MAX2(_max, val);
+  }
+  _average = (double)_sum / (double)active_threads;
+  _has_new_data = false;
+}
+
+#ifndef PRODUCT
+
+template <class T>
+void WorkerDataArray<T>::reset() {
+  for (uint i = 0; i < _length; i++) {
+    _data[i] = WorkerDataArray<T>::uninitialized();
+  }
+  if (_thread_work_items != NULL) {
+    _thread_work_items->reset();
+  }
+}
+
+template <class T>
+void WorkerDataArray<T>::verify(uint active_threads) {
+  if (!_enabled) {
+    return;
+  }
+
+  assert(active_threads <= _length, "Wrong number of active threads");
+  for (uint i = 0; i < active_threads; i++) {
+    assert(_data[i] != WorkerDataArray<T>::uninitialized(),
+        err_msg("Invalid data for worker %u in '%s'", i, _title));
+  }
+  if (_thread_work_items != NULL) {
+    _thread_work_items->verify(active_threads);
+  }
+}
+#endif // PRODUCT
+#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_WORKERDATAARRAY_INLINE_HPP
diff --git a/src/share/vm/memory/collectorPolicy.cpp b/src/share/vm/memory/collectorPolicy.cpp
index 727408d52..ad5e5004a 100644
--- a/src/share/vm/memory/collectorPolicy.cpp
+++ b/src/share/vm/memory/collectorPolicy.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -200,7 +200,10 @@ GenCollectorPolicy::GenCollectorPolicy() :
     _max_gen0_size(0),
     _gen_alignment(0),
     _generations(NULL)
-{}
+{
+  int parallel_gc_threads = CollectedHeap::use_parallel_gc_threads() ? ParallelGCThreads : 1;
+  _phase_times  = new GenGCPhaseTimes(parallel_gc_threads);
+}
 
 size_t GenCollectorPolicy::scale_by_NewRatio_aligned(size_t base_size) {
   return align_size_down_bounded(base_size / (NewRatio + 1), _gen_alignment);
diff --git a/src/share/vm/memory/collectorPolicy.hpp b/src/share/vm/memory/collectorPolicy.hpp
index e0982bafc..0e4a3d9a7 100644
--- a/src/share/vm/memory/collectorPolicy.hpp
+++ b/src/share/vm/memory/collectorPolicy.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,6 +30,7 @@
 #include "memory/generationSpec.hpp"
 #include "memory/genRemSet.hpp"
 #include "utilities/macros.hpp"
+#include "gc_implementation/shared/gcTraceTime.hpp"
 
 // This class (or more correctly, subtypes of this class)
 // are used to define global garbage collector attributes.
@@ -232,6 +233,8 @@ friend class TestGenCollectorPolicy;
 
   GenerationSpec **_generations;
 
+  GenGCPhaseTimes* _phase_times;
+
   // Return true if an allocation should be attempted in the older
   // generation if it fails in the younger generation.  Return
   // false, otherwise.
@@ -260,6 +263,8 @@ friend class TestGenCollectorPolicy;
  public:
   GenCollectorPolicy();
 
+  GenGCPhaseTimes* phase_times()                  const { return _phase_times;                  }
+
   // Accessors
   size_t min_gen0_size()     { return _min_gen0_size; }
   size_t initial_gen0_size() { return _initial_gen0_size; }
diff --git a/src/share/vm/memory/defNewGeneration.cpp b/src/share/vm/memory/defNewGeneration.cpp
index 273d8a313..9b93dc8a7 100644
--- a/src/share/vm/memory/defNewGeneration.cpp
+++ b/src/share/vm/memory/defNewGeneration.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -625,6 +625,8 @@ void DefNewGeneration::collect(bool   full,
   assert(gch->no_allocs_since_save_marks(0),
          "save marks have not been newly set.");
 
+  GenGCPhaseTimes* phase_times = gch->gen_policy()->phase_times();
+  phase_times->note_gc_start(1);
   gch->gen_process_roots(_level,
                          true,  // Process younger gens, if any,
                                 // as strong roots.
@@ -633,7 +635,9 @@ void DefNewGeneration::collect(bool   full,
                          GenCollectedHeap::StrongAndWeakRoots,
                          &fsc_with_no_gc_barrier,
                          &fsc_with_gc_barrier,
-                         &cld_scan_closure);
+                         &cld_scan_closure,
+                         phase_times,
+                         0);
 
   // "evacuate followers".
   evacuate_followers.do_void();
diff --git a/src/share/vm/memory/genCollectedHeap.cpp b/src/share/vm/memory/genCollectedHeap.cpp
index ddf0a860c..b9a2446ee 100644
--- a/src/share/vm/memory/genCollectedHeap.cpp
+++ b/src/share/vm/memory/genCollectedHeap.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -619,7 +619,9 @@ void GenCollectedHeap::process_roots(bool activate_scope,
                                      OopClosure* weak_roots,
                                      CLDClosure* strong_cld_closure,
                                      CLDClosure* weak_cld_closure,
-                                     CodeBlobToOopClosure* code_roots) {
+                                     CodeBlobToOopClosure* code_roots,
+                                     GenGCPhaseTimes* phase_times,
+                                     uint worker_id) {
   StrongRootsScope srs(this, activate_scope);
 
   // General roots.
@@ -630,71 +632,117 @@ void GenCollectedHeap::process_roots(bool activate_scope,
   // could be trying to change the termination condition while the task
   // is executing in another GC worker.
 
-  if (!_process_strong_tasks->is_task_claimed(GCH_PS_ClassLoaderDataGraph_oops_do)) {
-    ClassLoaderDataGraph::roots_cld_do(strong_cld_closure, weak_cld_closure);
+  //CLDGRoots
+  {
+    GenGCParPhaseTimesTracker x(phase_times, GenGCPhaseTimes::CLDGRoots, worker_id);
+    if (!_process_strong_tasks->is_task_claimed(GCH_PS_ClassLoaderDataGraph_oops_do)) {
+      ClassLoaderDataGraph::roots_cld_do(strong_cld_closure, weak_cld_closure);
+    }
   }
-
   // Some CLDs contained in the thread frames should be considered strong.
   // Don't process them if they will be processed during the ClassLoaderDataGraph phase.
   CLDClosure* roots_from_clds_p = (strong_cld_closure != weak_cld_closure) ? strong_cld_closure : NULL;
   // Only process code roots from thread stacks if we aren't visiting the entire CodeCache anyway
   CodeBlobToOopClosure* roots_from_code_p = (so & SO_AllCodeCache) ? NULL : code_roots;
+  
+  // ThreadRoots
+  {
+    GenGCParPhaseTimesTracker x(phase_times, GenGCPhaseTimes::ThreadRoots, worker_id);
+    Threads::possibly_parallel_oops_do(strong_roots, roots_from_clds_p, roots_from_code_p);
+  }
 
-  Threads::possibly_parallel_oops_do(strong_roots, roots_from_clds_p, roots_from_code_p);
-
-  if (!_process_strong_tasks->is_task_claimed(GCH_PS_Universe_oops_do)) {
-    Universe::oops_do(strong_roots);
+  //UniverseRoots
+  {
+    GenGCParPhaseTimesTracker x(phase_times, GenGCPhaseTimes::UniverseRoots, worker_id);
+    if (!_process_strong_tasks->is_task_claimed(GCH_PS_Universe_oops_do)) {
+      Universe::oops_do(strong_roots);
+    }
   }
+
   // Global (strong) JNI handles
-  if (!_process_strong_tasks->is_task_claimed(GCH_PS_JNIHandles_oops_do)) {
-    JNIHandles::oops_do(strong_roots);
+  {
+    GenGCParPhaseTimesTracker x(phase_times, GenGCPhaseTimes::JNIRoots, worker_id);
+    if (!_process_strong_tasks->is_task_claimed(GCH_PS_JNIHandles_oops_do)) {
+      JNIHandles::oops_do(strong_roots);
+    }
   }
 
-  if (!_process_strong_tasks->is_task_claimed(GCH_PS_ObjectSynchronizer_oops_do)) {
-    ObjectSynchronizer::oops_do(strong_roots);
+  // ObjectSynchronizerRoots
+  {
+    GenGCParPhaseTimesTracker x(phase_times, GenGCPhaseTimes::ObjectSynchronizerRoots, worker_id);
+    if (!_process_strong_tasks->is_task_claimed(GCH_PS_ObjectSynchronizer_oops_do)) {
+      ObjectSynchronizer::oops_do(strong_roots);
+    }
   }
-  if (!_process_strong_tasks->is_task_claimed(GCH_PS_FlatProfiler_oops_do)) {
-    FlatProfiler::oops_do(strong_roots);
+
+  // FlatProfilerRoots
+  {
+    GenGCParPhaseTimesTracker x(phase_times, GenGCPhaseTimes::FlatProfilerRoots, worker_id);
+    if (!_process_strong_tasks->is_task_claimed(GCH_PS_FlatProfiler_oops_do)) {
+      FlatProfiler::oops_do(strong_roots);
+    }
   }
-  if (!_process_strong_tasks->is_task_claimed(GCH_PS_Management_oops_do)) {
-    Management::oops_do(strong_roots);
+
+  // ManagementRoots
+  {
+    GenGCParPhaseTimesTracker x(phase_times, GenGCPhaseTimes::ManagementRoots, worker_id);
+    if (!_process_strong_tasks->is_task_claimed(GCH_PS_Management_oops_do)) {
+      Management::oops_do(strong_roots);
+    }
   }
-  if (!_process_strong_tasks->is_task_claimed(GCH_PS_jvmti_oops_do)) {
-    JvmtiExport::oops_do(strong_roots);
+
+  // JVMTIRoots
+  {
+    GenGCParPhaseTimesTracker x(phase_times, GenGCPhaseTimes::JVMTIRoots, worker_id);
+    if (!_process_strong_tasks->is_task_claimed(GCH_PS_jvmti_oops_do)) {
+      JvmtiExport::oops_do(strong_roots);
+    }
   }
 
-  if (!_process_strong_tasks->is_task_claimed(GCH_PS_SystemDictionary_oops_do)) {
-    SystemDictionary::roots_oops_do(strong_roots, weak_roots);
+  // SystemDictionaryRoots
+  {
+    GenGCParPhaseTimesTracker x(phase_times, GenGCPhaseTimes::SystemDictionaryRoots, worker_id);
+    if (!_process_strong_tasks->is_task_claimed(GCH_PS_SystemDictionary_oops_do)) {
+      SystemDictionary::roots_oops_do(strong_roots, weak_roots);
+    }
   }
 
   // All threads execute the following. A specific chunk of buckets
   // from the StringTable are the individual tasks.
-  if (weak_roots != NULL) {
-    if (CollectedHeap::use_parallel_gc_threads()) {
-      StringTable::possibly_parallel_oops_do(weak_roots);
-    } else {
-      StringTable::oops_do(weak_roots);
+  // StringTableRoots
+  {
+    GenGCParPhaseTimesTracker x(phase_times, GenGCPhaseTimes::StringTableRoots, worker_id);
+    if (weak_roots != NULL) {
+      if (CollectedHeap::use_parallel_gc_threads()) {
+        StringTable::possibly_parallel_oops_do(weak_roots);
+      } else {
+        StringTable::oops_do(weak_roots);
+      }
     }
   }
 
-  if (!_process_strong_tasks->is_task_claimed(GCH_PS_CodeCache_oops_do)) {
-    if (so & SO_ScavengeCodeCache) {
-      assert(code_roots != NULL, "must supply closure for code cache");
+  // CodeCacheRoots
+  {
+    GenGCParPhaseTimesTracker x(phase_times, GenGCPhaseTimes::CodeCacheRoots, worker_id);
+    if (!_process_strong_tasks->is_task_claimed(GCH_PS_CodeCache_oops_do)) {
+      if (so & SO_ScavengeCodeCache) {
+        assert(code_roots != NULL, "must supply closure for code cache");
 
-      // We only visit parts of the CodeCache when scavenging.
-      CodeCache::scavenge_root_nmethods_do(code_roots);
-    }
-    if (so & SO_AllCodeCache) {
-      assert(code_roots != NULL, "must supply closure for code cache");
+        // We only visit parts of the CodeCache when scavenging.
+        CodeCache::scavenge_root_nmethods_do(code_roots);
+      }
+      if (so & SO_AllCodeCache) {
+        assert(code_roots != NULL, "must supply closure for code cache");
 
-      // CMSCollector uses this to do intermediate-strength collections.
-      // We scan the entire code cache, since CodeCache::do_unloading is not called.
-      CodeCache::blobs_do(code_roots);
+        // CMSCollector uses this to do intermediate-strength collections.
+        // We scan the entire code cache, since CodeCache::do_unloading is not called.
+        CodeCache::blobs_do(code_roots);
+      }
+      // Verify that the code cache contents are not subject to
+      // movement by a scavenging collection.
+      DEBUG_ONLY(CodeBlobToOopClosure assert_code_is_non_scavengable(&assert_is_non_scavengable_closure, !CodeBlobToOopClosure::FixRelocations));
+      DEBUG_ONLY(CodeCache::asserted_non_scavengable_nmethods_do(&assert_code_is_non_scavengable));
     }
-    // Verify that the code cache contents are not subject to
-    // movement by a scavenging collection.
-    DEBUG_ONLY(CodeBlobToOopClosure assert_code_is_non_scavengable(&assert_is_non_scavengable_closure, !CodeBlobToOopClosure::FixRelocations));
-    DEBUG_ONLY(CodeCache::asserted_non_scavengable_nmethods_do(&assert_code_is_non_scavengable));
   }
 
 }
@@ -706,9 +754,12 @@ void GenCollectedHeap::gen_process_roots(int level,
                                          bool only_strong_roots,
                                          OopsInGenClosure* not_older_gens,
                                          OopsInGenClosure* older_gens,
-                                         CLDClosure* cld_closure) {
+                                         CLDClosure* cld_closure,
+                                         GenGCPhaseTimes* phase_times,
+                                         uint worker_id) {
   const bool is_adjust_phase = !only_strong_roots && !younger_gens_as_roots;
 
+  GenGCParPhaseTimesTracker x(phase_times, GenGCPhaseTimes::RootProcess, worker_id);
   bool is_moving_collection = false;
   if (level == 0 || is_adjust_phase) {
     // young collections are always moving
@@ -722,7 +773,7 @@ void GenCollectedHeap::gen_process_roots(int level,
   process_roots(activate_scope, so,
                 not_older_gens, weak_roots,
                 cld_closure, weak_cld_closure,
-                &mark_code_closure);
+                &mark_code_closure, phase_times, worker_id);
 
   if (younger_gens_as_roots) {
     if (!_process_strong_tasks->is_task_claimed(GCH_PS_younger_gens)) {
@@ -733,14 +784,17 @@ void GenCollectedHeap::gen_process_roots(int level,
       not_older_gens->reset_generation();
     }
   }
-  // When collection is parallel, all threads get to cooperate to do
-  // older-gen scanning.
-  for (int i = level+1; i < _n_gens; i++) {
-    older_gens->set_generation(_gens[i]);
-    rem_set()->younger_refs_iterate(_gens[i], older_gens);
-    older_gens->reset_generation();
-  }
 
+  {
+    GenGCParPhaseTimesTracker x(phase_times, GenGCPhaseTimes::OldGenScan, worker_id);
+    // When collection is parallel, all threads get to cooperate to do
+    // older-gen scanning.
+    for (int i = level+1; i < _n_gens; i++) { 
+      older_gens->set_generation(_gens[i]);
+      rem_set()->younger_refs_iterate(_gens[i], older_gens);
+      older_gens->reset_generation();
+    }
+  }
   _process_strong_tasks->all_tasks_completed();
 }
 
diff --git a/src/share/vm/memory/genCollectedHeap.hpp b/src/share/vm/memory/genCollectedHeap.hpp
index 1a2fcaa06..88449acec 100644
--- a/src/share/vm/memory/genCollectedHeap.hpp
+++ b/src/share/vm/memory/genCollectedHeap.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
 #include "memory/collectorPolicy.hpp"
 #include "memory/generation.hpp"
 #include "memory/sharedHeap.hpp"
+#include "gc_implementation/shared/gcTraceTime.hpp"
 
 class SubTasksDone;
 
@@ -416,7 +417,9 @@ public:
                      OopClosure* weak_roots,
                      CLDClosure* strong_cld_closure,
                      CLDClosure* weak_cld_closure,
-                     CodeBlobToOopClosure* code_roots);
+                     CodeBlobToOopClosure* code_roots,
+                     GenGCPhaseTimes* phase_times,
+                     uint worker_id);
 
   void gen_process_roots(int level,
                          bool younger_gens_as_roots,
@@ -427,7 +430,9 @@ public:
                          OopsInGenClosure* older_gens,
                          CLDClosure* cld_closure,
                          CLDClosure* weak_cld_closure,
-                         CodeBlobClosure* code_closure);
+                         CodeBlobClosure* code_closure,
+                         GenGCPhaseTimes* phase_times,
+                         uint worker_id);
 
  public:
   static const bool StrongAndWeakRoots = false;
@@ -440,7 +445,9 @@ public:
                          bool only_strong_roots,
                          OopsInGenClosure* not_older_gens,
                          OopsInGenClosure* older_gens,
-                         CLDClosure* cld_closure);
+                         CLDClosure* cld_closure,
+                         GenGCPhaseTimes* phase_times,
+                         uint worker_id);
 
   // Apply "root_closure" to all the weak roots of the system.
   // These include JNI weak roots, string table,
diff --git a/src/share/vm/memory/genMarkSweep.cpp b/src/share/vm/memory/genMarkSweep.cpp
index c5dc4947c..73b66113b 100644
--- a/src/share/vm/memory/genMarkSweep.cpp
+++ b/src/share/vm/memory/genMarkSweep.cpp
@@ -197,7 +197,7 @@ void GenMarkSweep::mark_sweep_phase1(int level,
   trace(" 1");
 
   GenCollectedHeap* gch = GenCollectedHeap::heap();
-
+  GenGCPhaseTimes* phase_times = gch->gen_policy()->phase_times();
   // Because follow_root_closure is created statically, cannot
   // use OopsInGenClosure constructor which takes a generation,
   // as the Universe has not been created when the static constructors
@@ -206,7 +206,8 @@ void GenMarkSweep::mark_sweep_phase1(int level,
 
   // Need new claim bits before marking starts.
   ClassLoaderDataGraph::clear_claimed_marks();
-
+  phase_times->note_gc_start(1);
+  phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerStart, 0, os::elapsedTime());
   gch->gen_process_roots(level,
                          false, // Younger gens are not roots.
                          true,  // activate StrongRootsScope
@@ -214,8 +215,10 @@ void GenMarkSweep::mark_sweep_phase1(int level,
                          ClassUnloading,
                          &follow_root_closure,
                          &follow_root_closure,
-                         &follow_cld_closure);
-
+                         &follow_cld_closure,
+                         phase_times,
+                         0);
+  phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerEnd, 0, os::elapsedTime());
   // Process reference objects found during marking
   {
     ref_processor()->setup_policy(clear_all_softrefs);
@@ -225,6 +228,11 @@ void GenMarkSweep::mark_sweep_phase1(int level,
     gc_tracer()->report_gc_reference_stats(stats);
   }
 
+  phase_times->note_gc_end();
+  if (PrintGCRootsTraceTime && PrintGCDetails) {
+    phase_times->log_gc_details();
+  }
+
   // This is the point where the entire marking should have completed.
   assert(_marking_stack.is_empty(), "Marking should have completed");
 
@@ -246,7 +254,6 @@ void GenMarkSweep::mark_sweep_phase1(int level,
   gc_tracer()->report_object_count_after_gc(&is_alive);
 }
 
-
 void GenMarkSweep::mark_sweep_phase2() {
   // Now all live objects are marked, compute the new object addresses.
 
@@ -279,6 +286,8 @@ public:
 void GenMarkSweep::mark_sweep_phase3(int level) {
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
+  GenGCPhaseTimes* phase_times = gch->gen_policy()->phase_times();
+  
   // Adjust the pointers to reflect the new locations
   GCTraceTime tm("phase 3", PrintGC && Verbose, true, _gc_timer, _gc_tracer->gc_id());
   trace("3");
@@ -292,6 +301,9 @@ void GenMarkSweep::mark_sweep_phase3(int level) {
   // are run.
   adjust_pointer_closure.set_orig_generation(gch->get_gen(level));
 
+  phase_times->note_gc_start(1);
+  phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerStart, 0, os::elapsedTime());
+
   gch->gen_process_roots(level,
                          false, // Younger gens are not roots.
                          true,  // activate StrongRootsScope
@@ -299,7 +311,15 @@ void GenMarkSweep::mark_sweep_phase3(int level) {
                          GenCollectedHeap::StrongAndWeakRoots,
                          &adjust_pointer_closure,
                          &adjust_pointer_closure,
-                         &adjust_cld_closure);
+                         &adjust_cld_closure,
+                         phase_times,
+                         0);
+
+  phase_times->record_time_secs(GenGCPhaseTimes::GCWorkerEnd, 0, os::elapsedTime());
+  phase_times->note_gc_end();
+  if (PrintGCRootsTraceTime && PrintGCDetails) {
+    phase_times->log_gc_details();
+  }
 
   gch->gen_process_weak_roots(&adjust_pointer_closure);
 
diff --git a/src/share/vm/runtime/globals_ext.hpp b/src/share/vm/runtime/globals_ext.hpp
index 2d290e27c..733be2d82 100644
--- a/src/share/vm/runtime/globals_ext.hpp
+++ b/src/share/vm/runtime/globals_ext.hpp
@@ -30,6 +30,9 @@
                                                                                 \
   manageable(bool, PrintYoungGenHistoAfterParNewGC, false,                      \
             "print the young generation class histogram after parNew GC")       \
+                                                                                \
+  manageable(bool, PrintGCRootsTraceTime, false,                                \
+             "Print GC Trace Time")                                             \
 
   //add new AJVM specific flags here
 
diff --git a/test/gc/TestPrintGCRootsTraceTime.java b/test/gc/TestPrintGCRootsTraceTime.java
new file mode 100644
index 000000000..1e1157d97
--- /dev/null
+++ b/test/gc/TestPrintGCRootsTraceTime.java
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2019 Alibaba Group Holding Limited. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Alibaba designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+/*
+ * @test TestPrintGCDetailTraceTime
+ * @summary test print gc detail time
+ * @library /testlibrary
+ * @build TestPrintGCRootsTraceTime
+ * @run main/othervm TestPrintGCRootsTraceTime
+ */
+
+import java.util.ArrayList;
+import com.oracle.java.testlibrary.*;
+
+public class TestPrintGCRootsTraceTime {
+    public static void main(String[] args) throws Exception {
+        ProcessBuilder pb = null;
+        OutputAnalyzer output = null;
+        // test with the flag on
+        pb = ProcessTools.createJavaProcessBuilder( "-Xmx500m",
+                "-Xmx200m",
+                "-XX:+UseParNewGC",
+                "-XX:+UseConcMarkSweepGC",
+                "-XX:+PrintGCDetails",
+                "-verbose:gc",
+                "-XX:+PrintGCRootsTraceTime",
+                Foo.class.getName()
+                );
+        output = new OutputAnalyzer(pb.start());
+        output.shouldContain("Root Processing");
+        String [] strOut = output.getStdout().split("\n");
+        for (int i = 0; i < 11; i++) {
+            System.out.println(strOut[i]);
+        }
+        output.shouldHaveExitValue(0);
+
+        // test with the flag off
+        pb = ProcessTools.createJavaProcessBuilder( "-Xmx500m",
+                "-Xmn200m",
+                "-XX:+UseParNewGC",
+                "-XX:+UseConcMarkSweepGC",
+                "-XX:+PrintGCDetails",
+                "-verbose:gc",
+                "-XX:-PrintGCRootsTraceTime",
+                Foo.class.getName()
+                );
+        output = new OutputAnalyzer(pb.start());
+        output.shouldNotContain("Root Processing");
+        output.shouldHaveExitValue(0);
+
+        // test serial gc
+        pb = ProcessTools.createJavaProcessBuilder( "-Xmx500m",
+                "-Xmn200m",
+                "-XX:+UseSerialGC",
+                "-XX:+PrintGCDetails",
+                "-XX:+PrintGCRootsTraceTime",
+                "-verbose:gc",
+                Foo.class.getName());
+        output = new OutputAnalyzer(pb.start());
+        output.shouldNotContain("Root Processing");
+        output.shouldHaveExitValue(0);
+
+        // test g1 gc
+        pb = ProcessTools.createJavaProcessBuilder( "-Xmx500m",
+                "-Xmn200m",
+                "-XX:+UseG1GC",
+                "-XX:+PrintGCDetails",
+                "-XX:+PrintGCRootsTraceTime",
+                "-verbose:gc",
+                Foo.class.getName());
+        output = new OutputAnalyzer(pb.start());
+        output.shouldNotContain("Root Processing");
+        output.shouldHaveExitValue(0);
+
+        // test ps gc
+        pb = ProcessTools.createJavaProcessBuilder( "-Xmx500m",
+                "-Xmn200m",
+                "-XX:+UseParallelGC",
+                "-XX:+PrintGCDetails",
+                "-XX:+PrintGCRootsTraceTime",
+                "-verbose:gc",
+                Foo.class.getName());
+        output = new OutputAnalyzer(pb.start());
+        output.shouldNotContain("Root Processing");
+        output.shouldHaveExitValue(0);
+
+        // test ps young gc
+        pb = ProcessTools.createJavaProcessBuilder( "-Xmx500m",
+                "-Xmn200m",
+                "-XX:+UseParallelGC",
+                "-XX:-UseParallelOldGC",
+                "-XX:+PrintGCDetails",
+                "-XX:+PrintGCRootsTraceTime",
+                "-verbose:gc",
+                Foo.class.getName());
+        output = new OutputAnalyzer(pb.start());
+        output.shouldNotContain("Root Processing"); 
+        output.shouldHaveExitValue(0);
+
+        // test cms without ParNew
+         pb = ProcessTools.createJavaProcessBuilder( "-Xmx500m",
+                "-Xmn200m",
+                "-XX:+UseConcMarkSweepGC",
+                "-XX:-UseParNewGC",
+                "-XX:+PrintGCDetails",
+                "-XX:+PrintGCRootsTraceTime",
+                "-verbose:gc",
+                Foo.class.getName());
+        output = new OutputAnalyzer(pb.start());
+        output.shouldNotContain("Root Processing"); 
+        output.shouldHaveExitValue(0);
+
+        // test cms
+         pb = ProcessTools.createJavaProcessBuilder( "-Xmx500m",
+                "-Xmn200m",
+                "-XX:+UseConcMarkSweepGC",
+                "-XX:+PrintGCDetails",
+                "-XX:+PrintGCRootsTraceTime",
+                "-verbose:gc",
+                Foo.class.getName());
+        output = new OutputAnalyzer(pb.start());
+        output.shouldContain("Root Processing");
+        output.shouldHaveExitValue(0);
+
+        // test cms with parallel gc threads = 1
+        pb = ProcessTools.createJavaProcessBuilder( "-Xmx500m",
+                "-Xmn200m",
+                "-XX:+UseConcMarkSweepGC",
+                "-XX:+PrintGCDetails",
+                "-XX:ParallelGCThreads=1",
+                "-XX:+PrintGCRootsTraceTime",
+                "-verbose:gc",
+                Foo.class.getName());
+        output = new OutputAnalyzer(pb.start());
+        output.shouldContain("Root Processing");
+        output.shouldHaveExitValue(0);
+
+        // test system gc
+         pb = ProcessTools.createJavaProcessBuilder( "-Xmx500m",
+                "-Xmn200m",
+                "-XX:+PrintGCDetails",
+                "-XX:+PrintGCRootsTraceTime",
+                "-verbose:gc",
+                TestSystemGC.class.getName());
+        output = new OutputAnalyzer(pb.start());
+        output.shouldHaveExitValue(0);
+    }
+
+    // class for testing gc
+    public static class Foo {
+        public static void main(String []args) {
+            int count = 0;
+            ArrayList list = new ArrayList();
+            for (int i = 0; ; i++) {
+                byte[] data = new byte[1024];
+                list.add(data);
+                if (i % 1024 == 0) {
+                    count++;
+                    list.clear();
+                    if (count > 1024) {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+    // class for testing system gc
+    public static class TestSystemGC {
+        public static void main(String []args) throws Exception {
+            System.gc();
+        }
+    }
+}
-- 
GitLab