8069273: Decrease Hot Card Cache Lock contention

Reviewed-by: tschatzl, mgerdin

8069273: Decrease Hot Card Cache Lock contention
Reviewed-by: tschatzl, mgerdin
e44f2e1d · redestad · a8639db7 · e44f2e1d · e44f2e1d · e44f2e1d
4 changed file
--- a/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp
+++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -36,11 +36,10 @@ void G1HotCardCache::initialize(G1RegionToSpaceMapper* card_counts_storage) {
  if (default_use_cache()) {
    _use_cache = true;
-    _hot_cache_size = (1 << G1ConcRSLogCacheSize);
+    _hot_cache_size = (size_t)1 << G1ConcRSLogCacheSize;
    _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC);
-    _n_hot = 0;
+    reset_hot_cache_internal();
-    _hot_cache_idx = 0;
    // For refining the cards in the hot cache in parallel
    _hot_cache_par_chunk_size = (int)(ParallelGCThreads > 0 ? ClaimChunkSize : _hot_cache_size);
@@ -64,26 +63,21 @@ jbyte* G1HotCardCache::insert(jbyte* card_ptr) {
    // return it for immediate refining.
    return card_ptr;
  }
  // Otherwise, the card is hot.
-  jbyte* res = NULL;
+  size_t index = Atomic::add_ptr((intptr_t)1, (volatile intptr_t*)&_hot_cache_idx) - 1;
-  MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
+  size_t masked_index = index & (_hot_cache_size - 1);
-  if (_n_hot == _hot_cache_size) {
+  jbyte* current_ptr = _hot_cache[masked_index];
-    res = _hot_cache[_hot_cache_idx];
-    _n_hot--;
+  // Try to store the new card pointer into the cache. Compare-and-swap to guard
-  }
+  // against the unlikely event of a race resulting in another card pointer to
+  // have already been written to the cache. In this case we will return
-  // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
+  // card_ptr in favor of the other option, which would be starting over. This
-  _hot_cache[_hot_cache_idx] = card_ptr;
+  // should be OK since card_ptr will likely be the older card already when/if
-  _hot_cache_idx++;
+  // this ever happens.
+  jbyte* previous_ptr = (jbyte*)Atomic::cmpxchg_ptr(card_ptr,
-  if (_hot_cache_idx == _hot_cache_size) {
+                                                    &_hot_cache[masked_index],
-    // Wrap around
+                                                    current_ptr);
-    _hot_cache_idx = 0;
+  return (previous_ptr == current_ptr) ? previous_ptr : card_ptr;
-  }
-  _n_hot++;
-  return res;
 }
 void G1HotCardCache::drain(uint worker_i,
@@ -96,38 +90,37 @@ void G1HotCardCache::drain(uint worker_i,
  assert(_hot_cache != NULL, "Logic");
  assert(!use_cache(), "cache should be disabled");
-  int start_idx;
+  while (_hot_cache_par_claimed_idx < _hot_cache_size) {
+    size_t end_idx = Atomic::add_ptr((intptr_t)_hot_cache_par_chunk_size,
-  while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
+                                     (volatile intptr_t*)&_hot_cache_par_claimed_idx);
-    int end_idx = start_idx + _hot_cache_par_chunk_size;
+    size_t start_idx = end_idx - _hot_cache_par_chunk_size;
+    // The current worker has successfully claimed the chunk [start_idx..end_idx)
-    if (start_idx ==
+    end_idx = MIN2(end_idx, _hot_cache_size);
-        Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
+    for (size_t i = start_idx; i < end_idx; i++) {
-      // The current worker has successfully claimed the chunk [start_idx..end_idx)
+      jbyte* card_ptr = _hot_cache[i];
-      end_idx = MIN2(end_idx, _n_hot);
+      if (card_ptr != NULL) {
-      for (int i = start_idx; i < end_idx; i++) {
+        if (g1rs->refine_card(card_ptr, worker_i, true)) {
-        jbyte* card_ptr = _hot_cache[i];
+          // The part of the heap spanned by the card contains references
-        if (card_ptr != NULL) {
+          // that point into the current collection set.
-          if (g1rs->refine_card(card_ptr, worker_i, true)) {
+          // We need to record the card pointer in the DirtyCardQueueSet
-            // The part of the heap spanned by the card contains references
+          // that we use for such cards.
-            // that point into the current collection set.
+          //
-            // We need to record the card pointer in the DirtyCardQueueSet
+          // The only time we care about recording cards that contain
-            // that we use for such cards.
+          // references that point into the collection set is during
-            //
+          // RSet updating while within an evacuation pause.
-            // The only time we care about recording cards that contain
+          // In this case worker_i should be the id of a GC worker thread
-            // references that point into the collection set is during
+          assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
-            // RSet updating while within an evacuation pause.
+          assert(worker_i < ParallelGCThreads,
-            // In this case worker_i should be the id of a GC worker thread
+                 err_msg("incorrect worker id: %u", worker_i));
-            assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
-            assert(worker_i < (ParallelGCThreads == 0 ? 1 : ParallelGCThreads),
+          into_cset_dcq->enqueue(card_ptr);
-                   err_msg("incorrect worker id: "UINT32_FORMAT, worker_i));
-            into_cset_dcq->enqueue(card_ptr);
-          }
        }
+      } else {
+        break;
      }
    }
  }
  // The existing entries in the hot card cache, which were just refined
  // above, are discarded prior to re-enabling the cache near the end of the GC.
 }

--- a/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp
+++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -54,21 +54,30 @@ class HeapRegion;
 // code, increasing throughput.
 class G1HotCardCache: public CHeapObj<mtGC> {
-  G1CollectedHeap*   _g1h;
+  G1CollectedHeap*  _g1h;
+  bool              _use_cache;
+  G1CardCounts      _card_counts;
  // The card cache table
-  jbyte**      _hot_cache;
+  jbyte**           _hot_cache;
+  size_t            _hot_cache_size;
-  int          _hot_cache_size;
+  int               _hot_cache_par_chunk_size;
-  int          _n_hot;
-  int          _hot_cache_idx;
-  int          _hot_cache_par_chunk_size;
+  // Avoids false sharing when concurrently updating _hot_cache_idx or
-  volatile int _hot_cache_par_claimed_idx;
+  // _hot_cache_par_claimed_idx. These are never updated at the same time
+  // thus it's not necessary to separate them as well
+  char _pad_before[DEFAULT_CACHE_LINE_SIZE];
-  bool         _use_cache;
+  volatile size_t _hot_cache_idx;
-  G1CardCounts _card_counts;
+  volatile size_t _hot_cache_par_claimed_idx;
+  char _pad_after[DEFAULT_CACHE_LINE_SIZE];
  // The number of cached cards a thread claims when flushing the cache
  static const int ClaimChunkSize = 32;
@@ -113,16 +122,25 @@ class G1HotCardCache: public CHeapObj<mtGC> {
  void reset_hot_cache() {
    assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
    assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread");
-    _hot_cache_idx = 0; _n_hot = 0;
+    if (default_use_cache()) {
+        reset_hot_cache_internal();
+    }
  }
-  bool hot_cache_is_empty() { return _n_hot == 0; }
  // Zeros the values in the card counts table for entire committed heap
  void reset_card_counts();
  // Zeros the values in the card counts table for the given region
  void reset_card_counts(HeapRegion* hr);
+ private:
+  void reset_hot_cache_internal() {
+    assert(_hot_cache != NULL, "Logic");
+    _hot_cache_idx = 0;
+    for (size_t i = 0; i < _hot_cache_size; i++) {
+      _hot_cache[i] = NULL;
+    }
+  }
 };
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1HOTCARDCACHE_HPP
--- a/src/share/vm/runtime/mutexLocker.cpp
+++ b/src/share/vm/runtime/mutexLocker.cpp
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -119,7 +119,6 @@ Monitor* SecondaryFreeList_lock       = NULL;
 Mutex*   OldSets_lock                 = NULL;
 Monitor* RootRegionScan_lock          = NULL;
 Mutex*   MMUTracker_lock              = NULL;
-Mutex*   HotCardCache_lock            = NULL;
 Monitor* GCTaskManager_lock           = NULL;
@@ -200,7 +199,6 @@ void mutex_init() {
    def(OldSets_lock               , Mutex  , leaf     ,   true );
    def(RootRegionScan_lock        , Monitor, leaf     ,   true );
    def(MMUTracker_lock            , Mutex  , leaf     ,   true );
-    def(HotCardCache_lock          , Mutex  , special  ,   true );
    def(EvacFailureStack_lock      , Mutex  , nonleaf  ,   true );
    def(StringDedupQueue_lock      , Monitor, leaf,        true );

--- a/src/share/vm/runtime/mutexLocker.hpp
+++ b/src/share/vm/runtime/mutexLocker.hpp
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -137,7 +137,6 @@ extern Mutex*   OldSets_lock;                    // protects the old region sets
 extern Monitor* RootRegionScan_lock;             // used to notify that the CM threads have finished scanning the IM snapshot regions
 extern Mutex*   MMUTracker_lock;                 // protects the MMU
                                                 // tracker data structures
-extern Mutex*   HotCardCache_lock;               // protects the hot card cache
 extern Mutex*   Management_lock;                 // a lock used to serialize JVM management
 extern Monitor* Service_lock;                    // a lock used for service thread operation