diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
index dfcb2c20b8dd26dc028581ad66cab2799afff402..b679db2097177288809d9f88dab1d8ced77eb3f4 100644
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@@ -2299,11 +2299,11 @@ void G1CollectedHeap::iterate_dirty_card_closure(CardTableEntryClosure* cl,
   hot_card_cache->drain(worker_i, g1_rem_set(), into_cset_dcq);
 
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
-  int n_completed_buffers = 0;
+  size_t n_completed_buffers = 0;
   while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) {
     n_completed_buffers++;
   }
-  g1_policy()->phase_times()->record_update_rs_processed_buffers(worker_i, n_completed_buffers);
+  g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, n_completed_buffers);
   dcqs.clear_n_completed_buffers();
   assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!");
 }
@@ -3912,10 +3912,10 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
 
     TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty);
 
-    int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+    uint active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
                                 workers()->active_workers() : 1);
     double pause_start_sec = os::elapsedTime();
-    g1_policy()->phase_times()->note_gc_start(active_workers);
+    g1_policy()->phase_times()->note_gc_start(active_workers, mark_in_progress());
     log_gc_header();
 
     TraceCollectorStats tcs(g1mm()->incremental_collection_counters());
@@ -4664,8 +4664,7 @@ public:
   void work(uint worker_id) {
     if (worker_id >= _n_workers) return;  // no work needed this round
 
-    double start_time_ms = os::elapsedTime() * 1000.0;
-    _g1h->g1_policy()->phase_times()->record_gc_worker_start_time(worker_id, start_time_ms);
+    _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerStart, worker_id, os::elapsedTime());
 
     {
       ResourceMark rm;
@@ -4745,10 +4744,11 @@ public:
         double start = os::elapsedTime();
         G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
         evac.do_void();
-        double elapsed_ms = (os::elapsedTime()-start)*1000.0;
-        double term_ms = pss.term_time()*1000.0;
-        _g1h->g1_policy()->phase_times()->add_obj_copy_time(worker_id, elapsed_ms-term_ms);
-        _g1h->g1_policy()->phase_times()->record_termination(worker_id, term_ms, pss.term_attempts());
+        double elapsed_sec = os::elapsedTime() - start;
+        double term_sec = pss.term_time();
+        _g1h->g1_policy()->phase_times()->add_time_secs(G1GCPhaseTimes::ObjCopy, worker_id, elapsed_sec - term_sec);
+        _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::Termination, worker_id, term_sec);
+        _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, pss.term_attempts());
       }
       _g1h->g1_policy()->record_thread_age_table(pss.age_table());
       _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
@@ -4764,9 +4764,7 @@ public:
       // destructors are executed here and are included as part of the
       // "GC Worker Time".
     }
-
-    double end_time_ms = os::elapsedTime() * 1000.0;
-    _g1h->g1_policy()->phase_times()->record_gc_worker_end_time(worker_id, end_time_ms);
+    _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerEnd, worker_id, os::elapsedTime());
   }
 };
 
@@ -4828,27 +4826,20 @@ g1_process_roots(OopClosure* scan_non_heap_roots,
   double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds()
       + buf_scan_non_heap_weak_roots.closure_app_seconds();
 
-  g1_policy()->phase_times()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
-
-  double ext_root_time_ms =
-    ((os::elapsedTime() - ext_roots_start) - obj_copy_time_sec) * 1000.0;
+  g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::ObjCopy, worker_i, obj_copy_time_sec);
 
-  g1_policy()->phase_times()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
+  double ext_root_time_sec = os::elapsedTime() - ext_roots_start - obj_copy_time_sec;
+  g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::ExtRootScan, worker_i, ext_root_time_sec);
 
   // During conc marking we have to filter the per-thread SATB buffers
   // to make sure we remove any oops into the CSet (which will show up
   // as implicitly live).
-  double satb_filtering_ms = 0.0;
-  if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers)) {
-    if (mark_in_progress()) {
-      double satb_filter_start = os::elapsedTime();
-
+  {
+    G1GCParPhaseTimesTracker x(g1_policy()->phase_times(), G1GCPhaseTimes::SATBFiltering, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers) && mark_in_progress()) {
       JavaThread::satb_mark_queue_set().filter_thread_buffers();
-
-      satb_filtering_ms = (os::elapsedTime() - satb_filter_start) * 1000.0;
     }
   }
-  g1_policy()->phase_times()->record_satb_filtering_time(worker_i, satb_filtering_ms);
 
   // Now scan the complement of the collection set.
   G1CodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots);
@@ -5271,7 +5262,8 @@ class G1RedirtyLoggedCardsTask : public AbstractGangTask {
   G1RedirtyLoggedCardsTask(DirtyCardQueueSet* queue) : AbstractGangTask("Redirty Cards"), _queue(queue) { }
 
   virtual void work(uint worker_id) {
-    double start_time = os::elapsedTime();
+    G1GCPhaseTimes* phase_times = G1CollectedHeap::heap()->g1_policy()->phase_times();
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::RedirtyCards, worker_id);
 
     RedirtyLoggedCardTableEntryClosure cl;
     if (G1CollectedHeap::heap()->use_parallel_gc_threads()) {
@@ -5280,9 +5272,7 @@ class G1RedirtyLoggedCardsTask : public AbstractGangTask {
       _queue->apply_closure_to_all_completed_buffers(&cl);
     }
 
-    G1GCPhaseTimes* timer = G1CollectedHeap::heap()->g1_policy()->phase_times();
-    timer->record_redirty_logged_cards_time_ms(worker_id, (os::elapsedTime() - start_time) * 1000.0);
-    timer->record_redirty_logged_cards_processed_cards(worker_id, cl.num_processed());
+    phase_times->record_thread_work_item(G1GCPhaseTimes::RedirtyCards, worker_id, cl.num_processed());
   }
 };
 
@@ -5884,12 +5874,14 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) {
     // reported parallel time.
   }
 
+  G1GCPhaseTimes* phase_times = g1_policy()->phase_times();
+
   double par_time_ms = (end_par_time_sec - start_par_time_sec) * 1000.0;
-  g1_policy()->phase_times()->record_par_time(par_time_ms);
+  phase_times->record_par_time(par_time_ms);
 
   double code_root_fixup_time_ms =
         (os::elapsedTime() - end_par_time_sec) * 1000.0;
-  g1_policy()->phase_times()->record_code_root_fixup_time(code_root_fixup_time_ms);
+  phase_times->record_code_root_fixup_time(code_root_fixup_time_ms);
 
   set_par_threads(0);
 
@@ -5901,9 +5893,14 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) {
   process_discovered_references(n_workers);
 
   if (G1StringDedup::is_enabled()) {
+    double fixup_start = os::elapsedTime();
+
     G1STWIsAliveClosure is_alive(this);
     G1KeepAliveClosure keep_alive(this);
-    G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive);
+    G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive, true, phase_times);
+
+    double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0;
+    phase_times->record_string_dedup_fixup_time(fixup_time_ms);
   }
 
   _allocator->release_gc_alloc_regions(n_workers, evacuation_info);
diff --git a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
index ab27810c11ceff5967fac973b948be71f6516a5f..ea80c4492d4d7bd16444320501401499277137e5 100644
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
@@ -1084,7 +1084,7 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua
   if (update_stats) {
     double cost_per_card_ms = 0.0;
     if (_pending_cards > 0) {
-      cost_per_card_ms = phase_times()->average_last_update_rs_time() / (double) _pending_cards;
+      cost_per_card_ms = phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) / (double) _pending_cards;
       _cost_per_card_ms_seq->add(cost_per_card_ms);
     }
 
@@ -1092,7 +1092,7 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua
 
     double cost_per_entry_ms = 0.0;
     if (cards_scanned > 10) {
-      cost_per_entry_ms = phase_times()->average_last_scan_rs_time() / (double) cards_scanned;
+      cost_per_entry_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) / (double) cards_scanned;
       if (_last_gc_was_young) {
         _cost_per_entry_ms_seq->add(cost_per_entry_ms);
       } else {
@@ -1134,7 +1134,7 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua
     double cost_per_byte_ms = 0.0;
 
     if (copied_bytes > 0) {
-      cost_per_byte_ms = phase_times()->average_last_obj_copy_time() / (double) copied_bytes;
+      cost_per_byte_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ObjCopy) / (double) copied_bytes;
       if (_in_marking_window) {
         _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms);
       } else {
@@ -1143,8 +1143,8 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua
     }
 
     double all_other_time_ms = pause_time_ms -
-      (phase_times()->average_last_update_rs_time() + phase_times()->average_last_scan_rs_time()
-      + phase_times()->average_last_obj_copy_time() + phase_times()->average_last_termination_time());
+      (phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) + phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) +
+          phase_times()->average_time_ms(G1GCPhaseTimes::ObjCopy) + phase_times()->average_time_ms(G1GCPhaseTimes::Termination));
 
     double young_other_time_ms = 0.0;
     if (young_cset_region_length() > 0) {
@@ -1185,8 +1185,8 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua
 
   // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
   double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
-  adjust_concurrent_refinement(phase_times()->average_last_update_rs_time(),
-                               phase_times()->sum_last_update_rs_processed_buffers(), update_rs_time_goal_ms);
+  adjust_concurrent_refinement(phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS),
+                               phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS), update_rs_time_goal_ms);
 
   _collectionSetChooser->verify();
 }
@@ -2177,19 +2177,19 @@ void TraceGen0TimeData::record_end_collection(double pause_time_ms, G1GCPhaseTim
     _other.add(pause_time_ms - phase_times->accounted_time_ms());
     _root_region_scan_wait.add(phase_times->root_region_scan_wait_time_ms());
     _parallel.add(phase_times->cur_collection_par_time_ms());
-    _ext_root_scan.add(phase_times->average_last_ext_root_scan_time());
-    _satb_filtering.add(phase_times->average_last_satb_filtering_times_ms());
-    _update_rs.add(phase_times->average_last_update_rs_time());
-    _scan_rs.add(phase_times->average_last_scan_rs_time());
-    _obj_copy.add(phase_times->average_last_obj_copy_time());
-    _termination.add(phase_times->average_last_termination_time());
-
-    double parallel_known_time = phase_times->average_last_ext_root_scan_time() +
-      phase_times->average_last_satb_filtering_times_ms() +
-      phase_times->average_last_update_rs_time() +
-      phase_times->average_last_scan_rs_time() +
-      phase_times->average_last_obj_copy_time() +
-      + phase_times->average_last_termination_time();
+    _ext_root_scan.add(phase_times->average_time_ms(G1GCPhaseTimes::ExtRootScan));
+    _satb_filtering.add(phase_times->average_time_ms(G1GCPhaseTimes::SATBFiltering));
+    _update_rs.add(phase_times->average_time_ms(G1GCPhaseTimes::UpdateRS));
+    _scan_rs.add(phase_times->average_time_ms(G1GCPhaseTimes::ScanRS));
+    _obj_copy.add(phase_times->average_time_ms(G1GCPhaseTimes::ObjCopy));
+    _termination.add(phase_times->average_time_ms(G1GCPhaseTimes::Termination));
+
+    double parallel_known_time = phase_times->average_time_ms(G1GCPhaseTimes::ExtRootScan) +
+      phase_times->average_time_ms(G1GCPhaseTimes::SATBFiltering) +
+      phase_times->average_time_ms(G1GCPhaseTimes::UpdateRS) +
+      phase_times->average_time_ms(G1GCPhaseTimes::ScanRS) +
+      phase_times->average_time_ms(G1GCPhaseTimes::ObjCopy) +
+      phase_times->average_time_ms(G1GCPhaseTimes::Termination);
 
     double parallel_other_time = phase_times->cur_collection_par_time_ms() - parallel_known_time;
     _parallel_other.add(parallel_other_time);
diff --git a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
index a133510348ed32cc2e6c39a93465d718c50a411e..c5cf47d7079669d716149f1983707ce57711ee7e 100644
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
@@ -22,12 +22,13 @@
  *
  */
 
-
 #include "precompiled.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1GCPhaseTimes.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1StringDedup.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/os.hpp"
 
 // Helper class for avoiding interleaved logging
 class LineBuffer: public StackObj {
@@ -70,184 +71,243 @@ public:
     va_end(ap);
   }
 
+  void print_cr() {
+    gclog_or_tty->print_cr("%s", _buffer);
+    _cur = _indent_level * INDENT_CHARS;
+  }
+
   void append_and_print_cr(const char* format, ...)  ATTRIBUTE_PRINTF(2, 3) {
     va_list ap;
     va_start(ap, format);
     vappend(format, ap);
     va_end(ap);
-    gclog_or_tty->print_cr("%s", _buffer);
-    _cur = _indent_level * INDENT_CHARS;
+    print_cr();
   }
 };
 
-PRAGMA_DIAG_PUSH
-PRAGMA_FORMAT_NONLITERAL_IGNORED
 template <class T>
-void WorkerDataArray<T>::print(int level, const char* title) {
-  if (_length == 1) {
-    // No need for min, max, average and sum for only one worker
-    LineBuffer buf(level);
-    buf.append("[%s:  ", title);
-    buf.append(_print_format, _data[0]);
-    buf.append_and_print_cr("]");
-    return;
+class WorkerDataArray  : public CHeapObj<mtGC> {
+  friend class G1GCParPhasePrinter;
+  T*          _data;
+  uint        _length;
+  const char* _title;
+  bool        _print_sum;
+  int         _log_level;
+  uint        _indent_level;
+  bool        _enabled;
+
+  WorkerDataArray<size_t>* _thread_work_items;
+
+  NOT_PRODUCT(T uninitialized();)
+
+  // We are caching the sum and average to only have to calculate them once.
+  // This is not done in an MT-safe way. It is intended to allow single
+  // threaded code to call sum() and average() multiple times in any order
+  // without having to worry about the cost.
+  bool   _has_new_data;
+  T      _sum;
+  T      _min;
+  T      _max;
+  double _average;
+
+ public:
+  WorkerDataArray(uint length, const char* title, bool print_sum, int log_level, uint indent_level) :
+    _title(title), _length(0), _print_sum(print_sum), _log_level(log_level), _indent_level(indent_level),
+    _has_new_data(true), _thread_work_items(NULL), _enabled(true) {
+    assert(length > 0, "Must have some workers to store data for");
+    _length = length;
+    _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
   }
 
-  T min = _data[0];
-  T max = _data[0];
-  T sum = 0;
+  ~WorkerDataArray() {
+    FREE_C_HEAP_ARRAY(T, _data, mtGC);
+  }
 
-  LineBuffer buf(level);
-  buf.append("[%s:", title);
-  for (uint i = 0; i < _length; ++i) {
-    T val = _data[i];
-    min = MIN2(val, min);
-    max = MAX2(val, max);
-    sum += val;
-    if (G1Log::finest()) {
-      buf.append("  ");
-      buf.append(_print_format, val);
+  void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items) {
+    _thread_work_items = thread_work_items;
+  }
+
+  WorkerDataArray<size_t>* thread_work_items() { return _thread_work_items; }
+
+  void set(uint worker_i, T value) {
+    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+    assert(_data[worker_i] == WorkerDataArray<T>::uninitialized(), err_msg("Overwriting data for worker %d in %s", worker_i, _title));
+    _data[worker_i] = value;
+    _has_new_data = true;
+  }
+
+  void set_thread_work_item(uint worker_i, size_t value) {
+    assert(_thread_work_items != NULL, "No sub count");
+    _thread_work_items->set(worker_i, value);
+  }
+
+  T get(uint worker_i) {
+    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+    assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data added for worker %d", worker_i));
+    return _data[worker_i];
+  }
+
+  void add(uint worker_i, T value) {
+    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+    assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data to add to for worker %d", worker_i));
+    _data[worker_i] += value;
+    _has_new_data = true;
+  }
+
+  double average(){
+    calculate_totals();
+    return _average;
+  }
+
+  T sum() {
+    calculate_totals();
+    return _sum;
+  }
+
+  T minimum() {
+    calculate_totals();
+    return _min;
+  }
+
+  T maximum() {
+    calculate_totals();
+    return _max;
+  }
+
+  void reset() PRODUCT_RETURN;
+  void verify() PRODUCT_RETURN;
+
+  void set_enabled(bool enabled) { _enabled = enabled; }
+
+  int log_level() { return _log_level;  }
+
+ private:
+
+  void calculate_totals(){
+    if (!_has_new_data) {
+      return;
     }
+
+    _sum = (T)0;
+    _min = _data[0];
+    _max = _min;
+    for (uint i = 0; i < _length; ++i) {
+      T val = _data[i];
+      _sum += val;
+      _min = MIN2(_min, val);
+      _max = MAX2(_max, val);
+    }
+    _average = (double)_sum / (double)_length;
+    _has_new_data = false;
   }
+};
 
-  if (G1Log::finest()) {
-    buf.append_and_print_cr("%s", "");
-  }
-
-  double avg = (double)sum / (double)_length;
-  buf.append(" Min: ");
-  buf.append(_print_format, min);
-  buf.append(", Avg: ");
-  buf.append("%.1lf", avg); // Always print average as a double
-  buf.append(", Max: ");
-  buf.append(_print_format, max);
-  buf.append(", Diff: ");
-  buf.append(_print_format, max - min);
-  if (_print_sum) {
-    // for things like the start and end times the sum is not
-    // that relevant
-    buf.append(", Sum: ");
-    buf.append(_print_format, sum);
-  }
-  buf.append_and_print_cr("]");
-}
-PRAGMA_DIAG_POP
 
 #ifndef PRODUCT
 
-template <> const int WorkerDataArray<int>::_uninitialized = -1;
-template <> const double WorkerDataArray<double>::_uninitialized = -1.0;
-template <> const size_t WorkerDataArray<size_t>::_uninitialized = (size_t)-1;
+template <>
+size_t WorkerDataArray<size_t>::uninitialized() {
+  return (size_t)-1;
+}
+
+template <>
+double WorkerDataArray<double>::uninitialized() {
+  return -1.0;
+}
 
 template <class T>
 void WorkerDataArray<T>::reset() {
   for (uint i = 0; i < _length; i++) {
-    _data[i] = (T)_uninitialized;
+    _data[i] = WorkerDataArray<T>::uninitialized();
+  }
+  if (_thread_work_items != NULL) {
+    _thread_work_items->reset();
   }
 }
 
 template <class T>
 void WorkerDataArray<T>::verify() {
+  if (!_enabled) {
+    return;
+  }
+
   for (uint i = 0; i < _length; i++) {
-    assert(_data[i] != _uninitialized,
-        err_msg("Invalid data for worker " UINT32_FORMAT ", data: %lf, uninitialized: %lf",
-            i, (double)_data[i], (double)_uninitialized));
+    assert(_data[i] != WorkerDataArray<T>::uninitialized(),
+        err_msg("Invalid data for worker %u in '%s'", i, _title));
+  }
+  if (_thread_work_items != NULL) {
+    _thread_work_items->verify();
   }
 }
 
 #endif
 
 G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
-  _max_gc_threads(max_gc_threads),
-  _last_gc_worker_start_times_ms(_max_gc_threads, "%.1lf", false),
-  _last_ext_root_scan_times_ms(_max_gc_threads, "%.1lf"),
-  _last_satb_filtering_times_ms(_max_gc_threads, "%.1lf"),
-  _last_update_rs_times_ms(_max_gc_threads, "%.1lf"),
-  _last_update_rs_processed_buffers(_max_gc_threads, "%d"),
-  _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"),
-  _last_strong_code_root_scan_times_ms(_max_gc_threads, "%.1lf"),
-  _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"),
-  _last_termination_times_ms(_max_gc_threads, "%.1lf"),
-  _last_termination_attempts(_max_gc_threads, SIZE_FORMAT),
-  _last_gc_worker_end_times_ms(_max_gc_threads, "%.1lf", false),
-  _last_gc_worker_times_ms(_max_gc_threads, "%.1lf"),
-  _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf"),
-  _last_redirty_logged_cards_time_ms(_max_gc_threads, "%.1lf"),
-  _last_redirty_logged_cards_processed_cards(_max_gc_threads, SIZE_FORMAT),
-  _cur_string_dedup_queue_fixup_worker_times_ms(_max_gc_threads, "%.1lf"),
-  _cur_string_dedup_table_fixup_worker_times_ms(_max_gc_threads, "%.1lf")
+  _max_gc_threads(max_gc_threads)
 {
   assert(max_gc_threads > 0, "Must have some GC threads");
+
+  _gc_par_phases[GCWorkerStart] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Start (ms)", false, G1Log::LevelFiner, 2);
+  _gc_par_phases[ExtRootScan] = new WorkerDataArray<double>(max_gc_threads, "Ext Root Scanning (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[SATBFiltering] = new WorkerDataArray<double>(max_gc_threads, "SATB Filtering (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[UpdateRS] = new WorkerDataArray<double>(max_gc_threads, "Update RS (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[ScanRS] = new WorkerDataArray<double>(max_gc_threads, "Scan RS (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[CodeRoots] = new WorkerDataArray<double>(max_gc_threads, "Code Root Scanning (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[ObjCopy] = new WorkerDataArray<double>(max_gc_threads, "Object Copy (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[Termination] = new WorkerDataArray<double>(max_gc_threads, "Termination (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[GCWorkerTotal] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Total (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[GCWorkerEnd] = new WorkerDataArray<double>(max_gc_threads, "GC Worker End (ms)", false, G1Log::LevelFiner, 2);
+  _gc_par_phases[Other] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Other (ms)", true, G1Log::LevelFiner, 2);
+
+  _update_rs_processed_buffers = new WorkerDataArray<size_t>(max_gc_threads, "Processed Buffers", true, G1Log::LevelFiner, 3);
+  _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_processed_buffers);
+
+  _termination_attempts = new WorkerDataArray<size_t>(max_gc_threads, "Termination Attempts", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[Termination]->link_thread_work_items(_termination_attempts);
+
+  _gc_par_phases[StringDedupQueueFixup] = new WorkerDataArray<double>(max_gc_threads, "Queue Fixup (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[StringDedupTableFixup] = new WorkerDataArray<double>(max_gc_threads, "Table Fixup (ms)", true, G1Log::LevelFiner, 2);
+
+  _gc_par_phases[RedirtyCards] = new WorkerDataArray<double>(max_gc_threads, "Parallel Redirty", true, G1Log::LevelFinest, 3);
+  _redirtied_cards = new WorkerDataArray<size_t>(max_gc_threads, "Redirtied Cards", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[RedirtyCards]->link_thread_work_items(_redirtied_cards);
 }
 
-void G1GCPhaseTimes::note_gc_start(uint active_gc_threads) {
+void G1GCPhaseTimes::note_gc_start(uint active_gc_threads, bool mark_in_progress) {
   assert(active_gc_threads > 0, "The number of threads must be > 0");
-  assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max nubmer of threads");
+  assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max number of threads");
   _active_gc_threads = active_gc_threads;
 
-  _last_gc_worker_start_times_ms.reset();
-  _last_ext_root_scan_times_ms.reset();
-  _last_satb_filtering_times_ms.reset();
-  _last_update_rs_times_ms.reset();
-  _last_update_rs_processed_buffers.reset();
-  _last_scan_rs_times_ms.reset();
-  _last_strong_code_root_scan_times_ms.reset();
-  _last_obj_copy_times_ms.reset();
-  _last_termination_times_ms.reset();
-  _last_termination_attempts.reset();
-  _last_gc_worker_end_times_ms.reset();
-  _last_gc_worker_times_ms.reset();
-  _last_gc_worker_other_times_ms.reset();
-
-  _last_redirty_logged_cards_time_ms.reset();
-  _last_redirty_logged_cards_processed_cards.reset();
+  for (int i = 0; i < GCParPhasesSentinel; i++) {
+    _gc_par_phases[i]->reset();
+  }
 
+  _gc_par_phases[SATBFiltering]->set_enabled(mark_in_progress);
+
+  _gc_par_phases[StringDedupQueueFixup]->set_enabled(G1StringDedup::is_enabled());
+  _gc_par_phases[StringDedupTableFixup]->set_enabled(G1StringDedup::is_enabled());
 }
 
 void G1GCPhaseTimes::note_gc_end() {
-  _last_gc_worker_start_times_ms.verify();
-  _last_ext_root_scan_times_ms.verify();
-  _last_satb_filtering_times_ms.verify();
-  _last_update_rs_times_ms.verify();
-  _last_update_rs_processed_buffers.verify();
-  _last_scan_rs_times_ms.verify();
-  _last_strong_code_root_scan_times_ms.verify();
-  _last_obj_copy_times_ms.verify();
-  _last_termination_times_ms.verify();
-  _last_termination_attempts.verify();
-  _last_gc_worker_end_times_ms.verify();
-
   for (uint i = 0; i < _active_gc_threads; i++) {
-    double worker_time = _last_gc_worker_end_times_ms.get(i) - _last_gc_worker_start_times_ms.get(i);
-    _last_gc_worker_times_ms.set(i, worker_time);
-
-    double worker_known_time = _last_ext_root_scan_times_ms.get(i) +
-                               _last_satb_filtering_times_ms.get(i) +
-                               _last_update_rs_times_ms.get(i) +
-                               _last_scan_rs_times_ms.get(i) +
-                               _last_strong_code_root_scan_times_ms.get(i) +
-                               _last_obj_copy_times_ms.get(i) +
-                               _last_termination_times_ms.get(i);
-
-    double worker_other_time = worker_time - worker_known_time;
-    _last_gc_worker_other_times_ms.set(i, worker_other_time);
+    double worker_time = _gc_par_phases[GCWorkerEnd]->get(i) - _gc_par_phases[GCWorkerStart]->get(i);
+    record_time_secs(GCWorkerTotal, i , worker_time);
+
+    double worker_known_time =
+        _gc_par_phases[ExtRootScan]->get(i) +
+        _gc_par_phases[SATBFiltering]->get(i) +
+        _gc_par_phases[UpdateRS]->get(i) +
+        _gc_par_phases[ScanRS]->get(i) +
+        _gc_par_phases[CodeRoots]->get(i) +
+        _gc_par_phases[ObjCopy]->get(i) +
+        _gc_par_phases[Termination]->get(i);
+
+    record_time_secs(Other, i, worker_time - worker_known_time);
   }
 
-  _last_gc_worker_times_ms.verify();
-  _last_gc_worker_other_times_ms.verify();
-
-  _last_redirty_logged_cards_time_ms.verify();
-  _last_redirty_logged_cards_processed_cards.verify();
-}
-
-void G1GCPhaseTimes::note_string_dedup_fixup_start() {
-  _cur_string_dedup_queue_fixup_worker_times_ms.reset();
-  _cur_string_dedup_table_fixup_worker_times_ms.reset();
-}
-
-void G1GCPhaseTimes::note_string_dedup_fixup_end() {
-  _cur_string_dedup_queue_fixup_worker_times_ms.verify();
-  _cur_string_dedup_table_fixup_worker_times_ms.verify();
+  for (int i = 0; i < GCParPhasesSentinel; i++) {
+    _gc_par_phases[i]->verify();
+  }
 }
 
 void G1GCPhaseTimes::print_stats(int level, const char* str, double value) {
@@ -259,7 +319,7 @@ void G1GCPhaseTimes::print_stats(int level, const char* str, size_t value) {
 }
 
 void G1GCPhaseTimes::print_stats(int level, const char* str, double value, uint workers) {
-  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: " UINT32_FORMAT "]", str, value, workers);
+  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: %u]", str, value, workers);
 }
 
 double G1GCPhaseTimes::accounted_time_ms() {
@@ -287,46 +347,172 @@ double G1GCPhaseTimes::accounted_time_ms() {
     return misc_time_ms;
 }
 
-void G1GCPhaseTimes::print(double pause_time_sec) {
-  if (_root_region_scan_wait_time_ms > 0.0) {
-    print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
+// record the time a phase took in seconds
+void G1GCPhaseTimes::record_time_secs(GCParPhases phase, uint worker_i, double secs) {
+  _gc_par_phases[phase]->set(worker_i, secs);
+}
+
+// add a number of seconds to a phase
+void G1GCPhaseTimes::add_time_secs(GCParPhases phase, uint worker_i, double secs) {
+  _gc_par_phases[phase]->add(worker_i, secs);
+}
+
+void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count) {
+  _gc_par_phases[phase]->set_thread_work_item(worker_i, count);
+}
+
+// return the average time for a phase in milliseconds
+double G1GCPhaseTimes::average_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->average() * 1000.0;
+}
+
+double G1GCPhaseTimes::get_time_ms(GCParPhases phase, uint worker_i) {
+  return _gc_par_phases[phase]->get(worker_i) * 1000.0;
+}
+
+double G1GCPhaseTimes::sum_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->sum() * 1000.0;
+}
+
+double G1GCPhaseTimes::min_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->minimum() * 1000.0;
+}
+
+double G1GCPhaseTimes::max_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->maximum() * 1000.0;
+}
+
+size_t G1GCPhaseTimes::get_thread_work_item(GCParPhases phase, uint worker_i) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->get(worker_i);
+}
+
+size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->sum();
+}
+
+double G1GCPhaseTimes::average_thread_work_items(GCParPhases phase) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->average();
+}
+
+size_t G1GCPhaseTimes::min_thread_work_items(GCParPhases phase) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->minimum();
+}
+
+size_t G1GCPhaseTimes::max_thread_work_items(GCParPhases phase) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->maximum();
+}
+
+class G1GCParPhasePrinter : public StackObj {
+  G1GCPhaseTimes* _phase_times;
+ public:
+  G1GCParPhasePrinter(G1GCPhaseTimes* phase_times) : _phase_times(phase_times) {}
+
+  void print(G1GCPhaseTimes::GCParPhases phase_id) {
+    WorkerDataArray<double>* phase = _phase_times->_gc_par_phases[phase_id];
+
+    if (phase->_log_level > G1Log::level() || !phase->_enabled) {
+      return;
+    }
+
+    if (phase->_length == 1) {
+      print_single_length(phase_id, phase);
+    } else {
+      print_multi_length(phase_id, phase);
+    }
+  }
+
+ private:
+
+  void print_single_length(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
+    // No need for min, max, average and sum for only one worker
+    LineBuffer buf(phase->_indent_level);
+    buf.append_and_print_cr("[%s:  %.1lf]", phase->_title, _phase_times->get_time_ms(phase_id, 0));
+
+    if (phase->_thread_work_items != NULL) {
+      LineBuffer buf2(phase->_thread_work_items->_indent_level);
+      buf2.append_and_print_cr("[%s:  "SIZE_FORMAT"]", phase->_thread_work_items->_title, _phase_times->sum_thread_work_items(phase_id));
+    }
+  }
+
+  void print_time_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
+    for (uint i = 0; i < phase->_length; ++i) {
+      buf.append("  %.1lf", _phase_times->get_time_ms(phase_id, i));
+    }
+    buf.print_cr();
+  }
+
+  void print_count_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) {
+    for (uint i = 0; i < thread_work_items->_length; ++i) {
+      buf.append("  " SIZE_FORMAT, _phase_times->get_thread_work_item(phase_id, i));
+    }
+    buf.print_cr();
   }
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads);
-    _last_gc_worker_start_times_ms.print(2, "GC Worker Start (ms)");
-    _last_ext_root_scan_times_ms.print(2, "Ext Root Scanning (ms)");
-    if (_last_satb_filtering_times_ms.sum() > 0.0) {
-      _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)");
+
+  void print_thread_work_items(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) {
+    LineBuffer buf(thread_work_items->_indent_level);
+    buf.append("[%s:", thread_work_items->_title);
+
+    if (G1Log::finest()) {
+      print_count_values(buf, phase_id, thread_work_items);
     }
-    _last_update_rs_times_ms.print(2, "Update RS (ms)");
-      _last_update_rs_processed_buffers.print(3, "Processed Buffers");
-    _last_scan_rs_times_ms.print(2, "Scan RS (ms)");
-    _last_strong_code_root_scan_times_ms.print(2, "Code Root Scanning (ms)");
-    _last_obj_copy_times_ms.print(2, "Object Copy (ms)");
-    _last_termination_times_ms.print(2, "Termination (ms)");
+
+    assert(thread_work_items->_print_sum, err_msg("%s does not have print sum true even though it is a count", thread_work_items->_title));
+
+    buf.append_and_print_cr(" Min: " SIZE_FORMAT ", Avg: %.1lf, Max: " SIZE_FORMAT ", Diff: " SIZE_FORMAT ", Sum: " SIZE_FORMAT "]",
+        _phase_times->min_thread_work_items(phase_id), _phase_times->average_thread_work_items(phase_id), _phase_times->max_thread_work_items(phase_id),
+        _phase_times->max_thread_work_items(phase_id) - _phase_times->min_thread_work_items(phase_id), _phase_times->sum_thread_work_items(phase_id));
+  }
+
+  void print_multi_length(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
+    LineBuffer buf(phase->_indent_level);
+    buf.append("[%s:", phase->_title);
+
     if (G1Log::finest()) {
-      _last_termination_attempts.print(3, "Termination Attempts");
+      print_time_values(buf, phase_id, phase);
     }
-    _last_gc_worker_other_times_ms.print(2, "GC Worker Other (ms)");
-    _last_gc_worker_times_ms.print(2, "GC Worker Total (ms)");
-    _last_gc_worker_end_times_ms.print(2, "GC Worker End (ms)");
-  } else {
-    _last_ext_root_scan_times_ms.print(1, "Ext Root Scanning (ms)");
-    if (_last_satb_filtering_times_ms.sum() > 0.0) {
-      _last_satb_filtering_times_ms.print(1, "SATB Filtering (ms)");
+
+    buf.append(" Min: %.1lf, Avg: %.1lf, Max: %.1lf, Diff: %.1lf",
+        _phase_times->min_time_ms(phase_id), _phase_times->average_time_ms(phase_id), _phase_times->max_time_ms(phase_id),
+        _phase_times->max_time_ms(phase_id) - _phase_times->min_time_ms(phase_id));
+
+    if (phase->_print_sum) {
+      // for things like the start and end times the sum is not
+      // that relevant
+      buf.append(", Sum: %.1lf", _phase_times->sum_time_ms(phase_id));
+    }
+
+    buf.append_and_print_cr("]");
+
+    if (phase->_thread_work_items != NULL) {
+      print_thread_work_items(phase_id, phase->_thread_work_items);
     }
-    _last_update_rs_times_ms.print(1, "Update RS (ms)");
-      _last_update_rs_processed_buffers.print(2, "Processed Buffers");
-    _last_scan_rs_times_ms.print(1, "Scan RS (ms)");
-    _last_strong_code_root_scan_times_ms.print(1, "Code Root Scanning (ms)");
-    _last_obj_copy_times_ms.print(1, "Object Copy (ms)");
   }
+};
+
+void G1GCPhaseTimes::print(double pause_time_sec) {
+  G1GCParPhasePrinter par_phase_printer(this);
+
+  if (_root_region_scan_wait_time_ms > 0.0) {
+    print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
+  }
+
+  print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads);
+  for (int i = 0; i <= GCMainParPhasesLast; i++) {
+    par_phase_printer.print((GCParPhases) i);
+  }
+
   print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
   print_stats(1, "Code Root Purge", _cur_strong_code_root_purge_time_ms);
   if (G1StringDedup::is_enabled()) {
     print_stats(1, "String Dedup Fixup", _cur_string_dedup_fixup_time_ms, _active_gc_threads);
-    _cur_string_dedup_queue_fixup_worker_times_ms.print(2, "Queue Fixup (ms)");
-    _cur_string_dedup_table_fixup_worker_times_ms.print(2, "Table Fixup (ms)");
+    for (int i = StringDedupPhasesFirst; i <= StringDedupPhasesLast; i++) {
+      par_phase_printer.print((GCParPhases) i);
+    }
   }
   print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
   double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms();
@@ -350,10 +536,7 @@ void G1GCPhaseTimes::print(double pause_time_sec) {
   print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
   print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
   print_stats(2, "Redirty Cards", _recorded_redirty_logged_cards_time_ms);
-  if (G1Log::finest()) {
-    _last_redirty_logged_cards_time_ms.print(3, "Parallel Redirty");
-    _last_redirty_logged_cards_processed_cards.print(3, "Redirtied Cards");
-  }
+  par_phase_printer.print(RedirtyCards);
   if (G1ReclaimDeadHumongousObjectsAtYoungGC) {
     print_stats(2, "Humongous Reclaim", _cur_fast_reclaim_humongous_time_ms);
     if (G1Log::finest()) {
@@ -373,3 +556,17 @@ void G1GCPhaseTimes::print(double pause_time_sec) {
     print_stats(2, "Verify After", _cur_verify_after_time_ms);
   }
 }
+
+G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id) :
+    _phase_times(phase_times), _phase(phase), _worker_id(worker_id) {
+  if (_phase_times != NULL) {
+    _start_time = os::elapsedTime();
+  }
+}
+
+G1GCParPhaseTimesTracker::~G1GCParPhaseTimesTracker() {
+  if (_phase_times != NULL) {
+    _phase_times->record_time_secs(_phase, _worker_id, os::elapsedTime() - _start_time);
+  }
+}
+
diff --git a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
index 8421eb07b6ab3784aa2931e742a3e51dc9e7b207..a0b012ca8ec4f271baf43d7389c6dfd0e9c15c2b 100644
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
@@ -26,106 +26,46 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
 
 #include "memory/allocation.hpp"
-#include "gc_interface/gcCause.hpp"
 
-template <class T>
-class WorkerDataArray  : public CHeapObj<mtGC> {
-  T*          _data;
-  uint        _length;
-  const char* _print_format;
-  bool        _print_sum;
+class LineBuffer;
 
-  NOT_PRODUCT(static const T _uninitialized;)
-
-  // We are caching the sum and average to only have to calculate them once.
-  // This is not done in an MT-safe way. It is intended to allow single
-  // threaded code to call sum() and average() multiple times in any order
-  // without having to worry about the cost.
-  bool   _has_new_data;
-  T      _sum;
-  double _average;
-
- public:
-  WorkerDataArray(uint length, const char* print_format, bool print_sum = true) :
-  _length(length), _print_format(print_format), _print_sum(print_sum), _has_new_data(true) {
-    assert(length > 0, "Must have some workers to store data for");
-    _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
-  }
-
-  ~WorkerDataArray() {
-    FREE_C_HEAP_ARRAY(T, _data, mtGC);
-  }
-
-  void set(uint worker_i, T value) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] == (T)-1, err_msg("Overwriting data for worker %d", worker_i));
-    _data[worker_i] = value;
-    _has_new_data = true;
-  }
-
-  T get(uint worker_i) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i));
-    return _data[worker_i];
-  }
-
-  void add(uint worker_i, T value) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i));
-    _data[worker_i] += value;
-    _has_new_data = true;
-  }
-
-  double average(){
-    if (_has_new_data) {
-      calculate_totals();
-    }
-    return _average;
-  }
-
-  T sum() {
-    if (_has_new_data) {
-      calculate_totals();
-    }
-    return _sum;
-  }
-
-  void print(int level, const char* title);
-
-  void reset() PRODUCT_RETURN;
-  void verify() PRODUCT_RETURN;
-
- private:
-
-  void calculate_totals(){
-    _sum = (T)0;
-    for (uint i = 0; i < _length; ++i) {
-      _sum += _data[i];
-    }
-    _average = (double)_sum / (double)_length;
-    _has_new_data = false;
-  }
-};
+template <class T> class WorkerDataArray;
 
 class G1GCPhaseTimes : public CHeapObj<mtGC> {
+  friend class G1GCParPhasePrinter;
 
- private:
   uint _active_gc_threads;
   uint _max_gc_threads;
 
-  WorkerDataArray<double> _last_gc_worker_start_times_ms;
-  WorkerDataArray<double> _last_ext_root_scan_times_ms;
-  WorkerDataArray<double> _last_satb_filtering_times_ms;
-  WorkerDataArray<double> _last_update_rs_times_ms;
-  WorkerDataArray<int>    _last_update_rs_processed_buffers;
-  WorkerDataArray<double> _last_scan_rs_times_ms;
-  WorkerDataArray<double> _last_strong_code_root_scan_times_ms;
-  WorkerDataArray<double> _last_obj_copy_times_ms;
-  WorkerDataArray<double> _last_termination_times_ms;
-  WorkerDataArray<size_t> _last_termination_attempts;
-  WorkerDataArray<double> _last_gc_worker_end_times_ms;
-  WorkerDataArray<double> _last_gc_worker_times_ms;
-  WorkerDataArray<double> _last_gc_worker_other_times_ms;
+ public:
+  enum GCParPhases {
+    GCWorkerStart,
+    ExtRootScan,
+    SATBFiltering,
+    UpdateRS,
+    ScanRS,
+    CodeRoots,
+    ObjCopy,
+    Termination,
+    Other,
+    GCWorkerTotal,
+    GCWorkerEnd,
+    StringDedupQueueFixup,
+    StringDedupTableFixup,
+    RedirtyCards,
+    GCParPhasesSentinel
+  };
+
+ private:
+  // Markers for grouping the phases in the GCPhases enum above
+  static const int GCMainParPhasesLast = GCWorkerEnd;
+  static const int StringDedupPhasesFirst = StringDedupQueueFixup;
+  static const int StringDedupPhasesLast = StringDedupTableFixup;
+
+  WorkerDataArray<double>* _gc_par_phases[GCParPhasesSentinel];
+  WorkerDataArray<size_t>* _update_rs_processed_buffers;
+  WorkerDataArray<size_t>* _termination_attempts;
+  WorkerDataArray<size_t>* _redirtied_cards;
 
   double _cur_collection_par_time_ms;
   double _cur_collection_code_root_fixup_time_ms;
@@ -135,9 +75,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
   double _cur_evac_fail_restore_remsets;
   double _cur_evac_fail_remove_self_forwards;
 
-  double                  _cur_string_dedup_fixup_time_ms;
-  WorkerDataArray<double> _cur_string_dedup_queue_fixup_worker_times_ms;
-  WorkerDataArray<double> _cur_string_dedup_table_fixup_worker_times_ms;
+  double _cur_string_dedup_fixup_time_ms;
 
   double _cur_clear_ct_time_ms;
   double _cur_ref_proc_time_ms;
@@ -149,8 +87,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
   double _recorded_young_cset_choice_time_ms;
   double _recorded_non_young_cset_choice_time_ms;
 
-  WorkerDataArray<double> _last_redirty_logged_cards_time_ms;
-  WorkerDataArray<size_t> _last_redirty_logged_cards_processed_cards;
   double _recorded_redirty_logged_cards_time_ms;
 
   double _recorded_young_free_cset_time_ms;
@@ -171,54 +107,34 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
 
  public:
   G1GCPhaseTimes(uint max_gc_threads);
-  void note_gc_start(uint active_gc_threads);
+  void note_gc_start(uint active_gc_threads, bool mark_in_progress);
   void note_gc_end();
   void print(double pause_time_sec);
 
-  void record_gc_worker_start_time(uint worker_i, double ms) {
-    _last_gc_worker_start_times_ms.set(worker_i, ms);
-  }
-
-  void record_ext_root_scan_time(uint worker_i, double ms) {
-    _last_ext_root_scan_times_ms.set(worker_i, ms);
-  }
-
-  void record_satb_filtering_time(uint worker_i, double ms) {
-    _last_satb_filtering_times_ms.set(worker_i, ms);
-  }
-
-  void record_update_rs_time(uint worker_i, double ms) {
-    _last_update_rs_times_ms.set(worker_i, ms);
-  }
-
-  void record_update_rs_processed_buffers(uint worker_i, int processed_buffers) {
-    _last_update_rs_processed_buffers.set(worker_i, processed_buffers);
-  }
+  // record the time a phase took in seconds
+  void record_time_secs(GCParPhases phase, uint worker_i, double secs);
 
-  void record_scan_rs_time(uint worker_i, double ms) {
-    _last_scan_rs_times_ms.set(worker_i, ms);
-  }
+  // add a number of seconds to a phase
+  void add_time_secs(GCParPhases phase, uint worker_i, double secs);
 
-  void record_strong_code_root_scan_time(uint worker_i, double ms) {
-    _last_strong_code_root_scan_times_ms.set(worker_i, ms);
-  }
+  void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count);
 
-  void record_obj_copy_time(uint worker_i, double ms) {
-    _last_obj_copy_times_ms.set(worker_i, ms);
-  }
+  // return the average time for a phase in milliseconds
+  double average_time_ms(GCParPhases phase);
 
-  void add_obj_copy_time(uint worker_i, double ms) {
-    _last_obj_copy_times_ms.add(worker_i, ms);
-  }
+  size_t sum_thread_work_items(GCParPhases phase);
 
-  void record_termination(uint worker_i, double ms, size_t attempts) {
-    _last_termination_times_ms.set(worker_i, ms);
-    _last_termination_attempts.set(worker_i, attempts);
-  }
+ private:
+  double get_time_ms(GCParPhases phase, uint worker_i);
+  double sum_time_ms(GCParPhases phase);
+  double min_time_ms(GCParPhases phase);
+  double max_time_ms(GCParPhases phase);
+  size_t get_thread_work_item(GCParPhases phase, uint worker_i);
+  double average_thread_work_items(GCParPhases phase);
+  size_t min_thread_work_items(GCParPhases phase);
+  size_t max_thread_work_items(GCParPhases phase);
 
-  void record_gc_worker_end_time(uint worker_i, double ms) {
-    _last_gc_worker_end_times_ms.set(worker_i, ms);
-  }
+ public:
 
   void record_clear_ct_time(double ms) {
     _cur_clear_ct_time_ms = ms;
@@ -248,21 +164,10 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
     _cur_evac_fail_remove_self_forwards = ms;
   }
 
-  void note_string_dedup_fixup_start();
-  void note_string_dedup_fixup_end();
-
   void record_string_dedup_fixup_time(double ms) {
     _cur_string_dedup_fixup_time_ms = ms;
   }
 
-  void record_string_dedup_queue_fixup_worker_time(uint worker_id, double ms) {
-    _cur_string_dedup_queue_fixup_worker_times_ms.set(worker_id, ms);
-  }
-
-  void record_string_dedup_table_fixup_worker_time(uint worker_id, double ms) {
-    _cur_string_dedup_table_fixup_worker_times_ms.set(worker_id, ms);
-  }
-
   void record_ref_proc_time(double ms) {
     _cur_ref_proc_time_ms = ms;
   }
@@ -301,14 +206,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
     _recorded_non_young_cset_choice_time_ms = time_ms;
   }
 
-  void record_redirty_logged_cards_time_ms(uint worker_i, double time_ms) {
-    _last_redirty_logged_cards_time_ms.set(worker_i, time_ms);
-  }
-
-  void record_redirty_logged_cards_processed_cards(uint worker_i, size_t processed_buffers) {
-    _last_redirty_logged_cards_processed_cards.set(worker_i, processed_buffers);
-  }
-
   void record_redirty_logged_cards_time_ms(double time_ms) {
     _recorded_redirty_logged_cards_time_ms = time_ms;
   }
@@ -362,38 +259,16 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
   double fast_reclaim_humongous_time_ms() {
     return _cur_fast_reclaim_humongous_time_ms;
   }
+};
 
-  double average_last_update_rs_time() {
-    return _last_update_rs_times_ms.average();
-  }
-
-  int sum_last_update_rs_processed_buffers() {
-    return _last_update_rs_processed_buffers.sum();
-  }
-
-  double average_last_scan_rs_time(){
-    return _last_scan_rs_times_ms.average();
-  }
-
-  double average_last_strong_code_root_scan_time(){
-    return _last_strong_code_root_scan_times_ms.average();
-  }
-
-  double average_last_obj_copy_time() {
-    return _last_obj_copy_times_ms.average();
-  }
-
-  double average_last_termination_time() {
-    return _last_termination_times_ms.average();
-  }
-
-  double average_last_ext_root_scan_time() {
-    return _last_ext_root_scan_times_ms.average();
-  }
-
-  double average_last_satb_filtering_times_ms() {
-    return _last_satb_filtering_times_ms.average();
-  }
+class G1GCParPhaseTimesTracker : public StackObj {
+  double _start_time;
+  G1GCPhaseTimes::GCParPhases _phase;
+  G1GCPhaseTimes* _phase_times;
+  uint _worker_id;
+public:
+  G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id);
+  ~G1GCParPhaseTimesTracker();
 };
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
diff --git a/src/share/vm/gc_implementation/g1/g1Log.hpp b/src/share/vm/gc_implementation/g1/g1Log.hpp
index b8da001cfd627c5e7a87152a38a282f841efcfd4..6f72c8fbc8e43efb5e4dd7ee6c8c0b6294e74545 100644
--- a/src/share/vm/gc_implementation/g1/g1Log.hpp
+++ b/src/share/vm/gc_implementation/g1/g1Log.hpp
@@ -28,6 +28,7 @@
 #include "memory/allocation.hpp"
 
 class G1Log : public AllStatic {
+ public:
   typedef enum {
     LevelNone,
     LevelFine,
@@ -35,6 +36,7 @@ class G1Log : public AllStatic {
     LevelFinest
   } LogLevel;
 
+ private:
   static LogLevel _level;
 
  public:
@@ -50,6 +52,10 @@ class G1Log : public AllStatic {
     return _level == LevelFinest;
   }
 
+  static LogLevel level() {
+    return _level;
+  }
+
   static void init();
 };
 
diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/src/share/vm/gc_implementation/g1/g1RemSet.cpp
index c3342638b17b469ad5566df9b453b5baf06cdd9c..6b2e847df44472ba2828a63bf3d85454b987f3a1 100644
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp
@@ -255,9 +255,8 @@ void G1RemSet::scanRS(G1ParPushHeapRSClosure* oc,
   assert(_cards_scanned != NULL, "invariant");
   _cards_scanned[worker_i] = scanRScl.cards_done();
 
-  _g1p->phase_times()->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
-  _g1p->phase_times()->record_strong_code_root_scan_time(worker_i,
-                                                         scanRScl.strong_code_root_scan_time_sec() * 1000.0);
+  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
+  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, scanRScl.strong_code_root_scan_time_sec());
 }
 
 // Closure used for updating RSets and recording references that
@@ -294,22 +293,11 @@ public:
 };
 
 void G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i) {
-  double start = os::elapsedTime();
+  G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i);
   // Apply the given closure to all remaining log entries.
   RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq);
 
   _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i);
-
-  // Now there should be no dirty cards.
-  if (G1RSLogCheckCardTable) {
-    CountNonCleanMemRegionClosure cl(_g1);
-    _ct_bs->mod_card_iterate(&cl);
-    // XXX This isn't true any more: keeping cards of young regions
-    // marked dirty broke it.  Need some reasonable fix.
-    guarantee(cl.n() == 0, "Card table should be clean.");
-  }
-
-  _g1p->phase_times()->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
 }
 
 void G1RemSet::cleanupHRRS() {
diff --git a/src/share/vm/gc_implementation/g1/g1StringDedup.cpp b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp
index d353d7ebd2d06ff76307923963d93bb691c6aeeb..bb960ee3a8bc00b83ab44433028c96bc3cfdf6f4 100644
--- a/src/share/vm/gc_implementation/g1/g1StringDedup.cpp
+++ b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp
@@ -105,7 +105,7 @@ void G1StringDedup::deduplicate(oop java_string) {
 
 void G1StringDedup::oops_do(OopClosure* keep_alive) {
   assert(is_enabled(), "String deduplication not enabled");
-  unlink_or_oops_do(NULL, keep_alive);
+  unlink_or_oops_do(NULL, keep_alive, true /* allow_resize_and_rehash */);
 }
 
 void G1StringDedup::unlink(BoolObjectClosure* is_alive) {
@@ -122,37 +122,35 @@ void G1StringDedup::unlink(BoolObjectClosure* is_alive) {
 class G1StringDedupUnlinkOrOopsDoTask : public AbstractGangTask {
 private:
   G1StringDedupUnlinkOrOopsDoClosure _cl;
+  G1GCPhaseTimes* _phase_times;
 
 public:
   G1StringDedupUnlinkOrOopsDoTask(BoolObjectClosure* is_alive,
                                   OopClosure* keep_alive,
-                                  bool allow_resize_and_rehash) :
+                                  bool allow_resize_and_rehash,
+                                  G1GCPhaseTimes* phase_times) :
     AbstractGangTask("G1StringDedupUnlinkOrOopsDoTask"),
-    _cl(is_alive, keep_alive, allow_resize_and_rehash) {
-  }
+    _cl(is_alive, keep_alive, allow_resize_and_rehash), _phase_times(phase_times) { }
 
   virtual void work(uint worker_id) {
-    double queue_fixup_start = os::elapsedTime();
-    G1StringDedupQueue::unlink_or_oops_do(&_cl);
-
-    double table_fixup_start = os::elapsedTime();
-    G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id);
-
-    double queue_fixup_time_ms = (table_fixup_start - queue_fixup_start) * 1000.0;
-    double table_fixup_time_ms = (os::elapsedTime() - table_fixup_start) * 1000.0;
-    G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
-    g1p->phase_times()->record_string_dedup_queue_fixup_worker_time(worker_id, queue_fixup_time_ms);
-    g1p->phase_times()->record_string_dedup_table_fixup_worker_time(worker_id, table_fixup_time_ms);
+    {
+      G1GCParPhaseTimesTracker x(_phase_times, G1GCPhaseTimes::StringDedupQueueFixup, worker_id);
+      G1StringDedupQueue::unlink_or_oops_do(&_cl);
+    }
+    {
+      G1GCParPhaseTimesTracker x(_phase_times, G1GCPhaseTimes::StringDedupTableFixup, worker_id);
+      G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id);
+    }
   }
 };
 
-void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, bool allow_resize_and_rehash) {
+void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive,
+                                      OopClosure* keep_alive,
+                                      bool allow_resize_and_rehash,
+                                      G1GCPhaseTimes* phase_times) {
   assert(is_enabled(), "String deduplication not enabled");
-  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
-  g1p->phase_times()->note_string_dedup_fixup_start();
-  double fixup_start = os::elapsedTime();
 
-  G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash);
+  G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash, phase_times);
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     G1CollectedHeap* g1h = G1CollectedHeap::heap();
     g1h->set_par_threads();
@@ -161,10 +159,6 @@ void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* k
   } else {
     task.work(0);
   }
-
-  double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0;
-  g1p->phase_times()->record_string_dedup_fixup_time(fixup_time_ms);
-  g1p->phase_times()->note_string_dedup_fixup_end();
 }
 
 void G1StringDedup::threads_do(ThreadClosure* tc) {
diff --git a/src/share/vm/gc_implementation/g1/g1StringDedup.hpp b/src/share/vm/gc_implementation/g1/g1StringDedup.hpp
index 68f700f6585ed4c6ccdb43949635a8adf70dfd08..3792a667ad232f2ed581582222e0b100bc3dd92e 100644
--- a/src/share/vm/gc_implementation/g1/g1StringDedup.hpp
+++ b/src/share/vm/gc_implementation/g1/g1StringDedup.hpp
@@ -90,6 +90,7 @@ class BoolObjectClosure;
 class ThreadClosure;
 class outputStream;
 class G1StringDedupTable;
+class G1GCPhaseTimes;
 
 //
 // Main interface for interacting with string deduplication.
@@ -130,7 +131,7 @@ public:
   static void oops_do(OopClosure* keep_alive);
   static void unlink(BoolObjectClosure* is_alive);
   static void unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive,
-                                bool allow_resize_and_rehash = true);
+                                bool allow_resize_and_rehash, G1GCPhaseTimes* phase_times = NULL);
 
   static void threads_do(ThreadClosure* tc);
   static void print_worker_threads_on(outputStream* st);