cache_bench enhancements for jemalloc etc. (#11758)

Summary: * Add some options to cache_bench to use JemallocNodumpAllocator * Make num_shard_bits option use and report cache-specific defaults * Add a usleep option to sleep between operations, for simulating a workload with more CPU idle/wait time. * Use const& for JemallocAllocatorOptions, to improve API usability (e.g. can bind to temporary `{}`) * InstallStackTraceHandler() Pull Request resolved: https://github.com/facebook/rocksdb/pull/11758 Test Plan: manual Reviewed By: jowlyzhang Differential Revision: D48668479 Pulled By: pdillinger fbshipit-source-id: b6032fbe09444cdb8f1443a5e017d2eea4f6205a

cache_bench enhancements for jemalloc etc. (#11758)
Summary: * Add some options to cache_bench to use JemallocNodumpAllocator * Make num_shard_bits option use and report cache-specific defaults * Add a usleep option to sleep between operations, for simulating a workload with more CPU idle/wait time. * Use const& for JemallocAllocatorOptions, to improve API usability (e.g. can bind to temporary `{}`) * InstallStackTraceHandler() Pull Request resolved: https://github.com/facebook/rocksdb/pull/11758 Test Plan: manual Reviewed By: jowlyzhang Differential Revision: D48668479 Pulled By: pdillinger fbshipit-source-id: b6032fbe09444cdb8f1443a5e017d2eea4f6205a
d3420464 · Peter Dillinger · Facebook GitHub Bot · 6353c6e2 · d3420464 · d3420464
4 changed file
--- a/cache/cache_bench_tool.cc
+++ b/cache/cache_bench_tool.cc
@@ -3,7 +3,6 @@
 //  COPYING file in the root directory) and Apache 2.0 License
 //  (found in the LICENSE.Apache file in the root directory).

-#include "cache_key.h"
 #ifdef GFLAGS
 #include <cinttypes>
 #include <cstddef>
@@ -13,9 +12,12 @@
 #include <set>
 #include <sstream>

+#include "cache/cache_key.h"
+#include "cache/sharded_cache.h"
 #include "db/db_impl/db_impl.h"
 #include "monitoring/histogram.h"
 #include "port/port.h"
+#include "port/stack_trace.h"
 #include "rocksdb/advanced_cache.h"
 #include "rocksdb/convenience.h"
 #include "rocksdb/db.h"
@@ -44,7 +46,8 @@ static constexpr uint64_t GiB = MiB << 10;
 DEFINE_uint32(threads, 16, "Number of concurrent threads to run.");
 DEFINE_uint64(cache_size, 1 * GiB,
              "Number of bytes to use as a cache of uncompressed data.");
-DEFINE_uint32(num_shard_bits, 6, "shard_bits.");
+DEFINE_int32(num_shard_bits, -1,
+             "ShardedCacheOptions::shard_bits. Default = auto");

 DEFINE_double(resident_ratio, 0.25,
              "Ratio of keys fitting in cache to keyspace.");
@@ -76,6 +79,8 @@ DEFINE_uint32(
 DEFINE_uint32(gather_stats_entries_per_lock, 256,
              "For Cache::ApplyToAllEntries");

+DEFINE_uint32(usleep, 0, "Sleep up to this many microseconds after each op.");
+
 DEFINE_bool(lean, false,
            "If true, no additional computation is performed besides cache "
            "operations.");
@@ -97,6 +102,17 @@ static class std::shared_ptr<ROCKSDB_NAMESPACE::SecondaryCache> secondary_cache;

 DEFINE_string(cache_type, "lru_cache", "Type of block cache.");

+DEFINE_bool(use_jemalloc_no_dump_allocator, false,
+            "Whether to use JemallocNoDumpAllocator");
+
+DEFINE_uint32(jemalloc_no_dump_allocator_num_arenas,
+              ROCKSDB_NAMESPACE::JemallocAllocatorOptions().num_arenas,
+              "JemallocNodumpAllocator::num_arenas");
+
+DEFINE_bool(jemalloc_no_dump_allocator_limit_tcache_size,
+            ROCKSDB_NAMESPACE::JemallocAllocatorOptions().limit_tcache_size,
+            "JemallocNodumpAllocator::limit_tcache_size");
+
 // ## BEGIN stress_cache_key sub-tool options ##
 // See class StressCacheKey below.
 DEFINE_bool(stress_cache_key, false,
@@ -239,8 +255,8 @@ struct KeyGen {
  }
 };

-Cache::ObjectPtr createValue(Random64& rnd) {
-  char* rv = new char[FLAGS_value_bytes];
+Cache::ObjectPtr createValue(Random64& rnd, MemoryAllocator* alloc) {
+  char* rv = AllocateBlock(FLAGS_value_bytes, alloc).release();
  // Fill with some filler data, and take some CPU time
  for (uint32_t i = 0; i < FLAGS_value_bytes; i += 8) {
    EncodeFixed64(rv + i, rnd.Next());
@@ -266,8 +282,8 @@ Status CreateFn(const Slice& data, Cache::CreateContext* /*context*/,
  return Status::OK();
 };

-void DeleteFn(Cache::ObjectPtr value, MemoryAllocator* /*alloc*/) {
-  delete[] static_cast<char*>(value);
+void DeleteFn(Cache::ObjectPtr value, MemoryAllocator* alloc) {
+  CustomDeleter{alloc}(static_cast<char*>(value));
 }

 Cache::CacheItemHelper helper1_wos(CacheEntryRole::kDataBlock, DeleteFn);
@@ -302,6 +318,15 @@ class CacheBench {
      exit(1);
    }

+    std::shared_ptr<MemoryAllocator> allocator;
+    if (FLAGS_use_jemalloc_no_dump_allocator) {
+      JemallocAllocatorOptions opts;
+      opts.num_arenas = FLAGS_jemalloc_no_dump_allocator_num_arenas;
+      opts.limit_tcache_size =
+          FLAGS_jemalloc_no_dump_allocator_limit_tcache_size;
+      Status s = NewJemallocNodumpAllocator(opts, &allocator);
+      assert(s.ok());
+    }
    if (FLAGS_cache_type == "clock_cache") {
      fprintf(stderr, "Old clock cache implementation has been removed.\n");
      exit(1);
@@ -309,6 +334,7 @@ class CacheBench {
      HyperClockCacheOptions opts(
          FLAGS_cache_size, /*estimated_entry_charge=*/0, FLAGS_num_shard_bits);
      opts.hash_seed = BitwiseAnd(FLAGS_seed, INT32_MAX);
+      opts.memory_allocator = allocator;
      if (FLAGS_cache_type == "fixed_hyper_clock_cache" ||
          FLAGS_cache_type == "hyper_clock_cache") {
        opts.estimated_entry_charge = FLAGS_value_bytes_estimate > 0
@@ -319,7 +345,7 @@ class CacheBench {
          opts.min_avg_entry_charge = FLAGS_value_bytes_estimate;
        }
      } else {
-        fprintf(stderr, "Cache type not supported.");
+        fprintf(stderr, "Cache type not supported.\n");
        exit(1);
      }
      cache_ = opts.MakeSharedCache();
@@ -328,6 +354,7 @@ class CacheBench {
                           false /* strict_capacity_limit */,
                           0.5 /* high_pri_pool_ratio */);
      opts.hash_seed = BitwiseAnd(FLAGS_seed, INT32_MAX);
+      opts.memory_allocator = allocator;
      if (!FLAGS_secondary_cache_uri.empty()) {
        Status s = SecondaryCache::CreateFromString(
            ConfigOptions(), FLAGS_secondary_cache_uri, &secondary_cache);
@@ -343,7 +370,7 @@ class CacheBench {

      cache_ = NewLRUCache(opts);
    } else {
-      fprintf(stderr, "Cache type not supported.");
+      fprintf(stderr, "Cache type not supported.\n");
      exit(1);
    }
  }
@@ -373,7 +400,8 @@ class CacheBench {
      keys_since_last_not_found = 0;

      Status s =
-          cache_->Insert(key, createValue(rnd), &helper1, FLAGS_value_bytes);
+          cache_->Insert(key, createValue(rnd, cache_->memory_allocator()),
+                         &helper1, FLAGS_value_bytes);
      assert(s.ok());

      handle = cache_->Lookup(key);
@@ -610,6 +638,7 @@ class CacheBench {
    const auto clock = SystemClock::Default().get();
    uint64_t start_time = clock->NowMicros();
    StopWatchNano timer(clock);
+    auto system_clock = SystemClock::Default();

    for (uint64_t i = 0; i < FLAGS_ops_per_thread; i++) {
      Slice key = gen.GetRand(thread->rnd, max_key_, FLAGS_skew);
@@ -637,8 +666,9 @@ class CacheBench {
        } else {
          ++lookup_misses;
          // do insert
-          Status s = cache_->Insert(key, createValue(thread->rnd), &helper2,
-                                    FLAGS_value_bytes, &handle);
+          Status s = cache_->Insert(
+              key, createValue(thread->rnd, cache_->memory_allocator()),
+              &helper2, FLAGS_value_bytes, &handle);
          assert(s.ok());
        }
      } else if (random_op < insert_threshold_) {
@@ -647,8 +677,9 @@ class CacheBench {
          handle = nullptr;
        }
        // do insert
-        Status s = cache_->Insert(key, createValue(thread->rnd), &helper3,
-                                  FLAGS_value_bytes, &handle);
+        Status s = cache_->Insert(
+            key, createValue(thread->rnd, cache_->memory_allocator()), &helper3,
+            FLAGS_value_bytes, &handle);
        assert(s.ok());
      } else if (random_op < lookup_threshold_) {
        if (handle) {
@@ -679,6 +710,13 @@ class CacheBench {
        thread->latency_ns_hist.Add(timer.ElapsedNanos());
      }
      thread->shared->AddLookupStats(lookup_hits, lookup_misses);
+      if (FLAGS_usleep > 0) {
+        unsigned us =
+            static_cast<unsigned>(thread->rnd.Uniform(FLAGS_usleep + 1));
+        if (us > 0) {
+          system_clock->SleepForMicroseconds(us);
+        }
+      }
    }
    if (FLAGS_early_exit) {
      MutexLock l(thread->shared->GetMutex());
@@ -712,7 +750,9 @@ class CacheBench {
    printf("Ops per thread      : %" PRIu64 "\n", FLAGS_ops_per_thread);
    printf("Cache size          : %s\n",
           BytesToHumanString(FLAGS_cache_size).c_str());
-    printf("Num shard bits      : %u\n", FLAGS_num_shard_bits);
+    printf("Num shard bits      : %d\n",
+           static_cast_with_check<ShardedCacheBase>(cache_.get())
+               ->GetNumShardBits());
    printf("Max key             : %" PRIu64 "\n", max_key_);
    printf("Resident ratio      : %g\n", FLAGS_resident_ratio);
    printf("Skew degree         : %u\n", FLAGS_skew);
@@ -1032,6 +1072,7 @@ class StressCacheKey {
 };

 int cache_bench_tool(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
  ParseCommandLineFlags(&argc, &argv, true);

  if (FLAGS_stress_cache_key) {

--- a/include/rocksdb/memory_allocator.h
+++ b/include/rocksdb/memory_allocator.h
@@ -81,7 +81,7 @@ struct JemallocAllocatorOptions {
 // The tcache normally incurs 0.5M extra memory usage per-thread. The usage
 // can be reduced by limiting allocation sizes to cache.
 extern Status NewJemallocNodumpAllocator(
-    JemallocAllocatorOptions& options,
+    const JemallocAllocatorOptions& options,
    std::shared_ptr<MemoryAllocator>* memory_allocator);

 }  // namespace ROCKSDB_NAMESPACE
--- a/memory/jemalloc_nodump_allocator.cc
+++ b/memory/jemalloc_nodump_allocator.cc
@@ -63,7 +63,7 @@ bool JemallocNodumpAllocator::IsSupported(std::string* why) {
 }

 JemallocNodumpAllocator::JemallocNodumpAllocator(
-    JemallocAllocatorOptions& options)
+    const JemallocAllocatorOptions& options)
    : options_(options)
 #ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
      ,
@@ -283,7 +283,7 @@ void JemallocNodumpAllocator::DestroyThreadSpecificCache(void* ptr) {
 #endif  // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR

 Status NewJemallocNodumpAllocator(
-    JemallocAllocatorOptions& options,
+    const JemallocAllocatorOptions& options,
    std::shared_ptr<MemoryAllocator>* memory_allocator) {
  if (memory_allocator == nullptr) {
    return Status::InvalidArgument("memory_allocator must be non-null.");

--- a/memory/jemalloc_nodump_allocator.h
+++ b/memory/jemalloc_nodump_allocator.h
@@ -30,7 +30,7 @@ namespace ROCKSDB_NAMESPACE {
 // arena mutexes.
 class JemallocNodumpAllocator : public BaseMemoryAllocator {
 public:
-  explicit JemallocNodumpAllocator(JemallocAllocatorOptions& options);
+  explicit JemallocNodumpAllocator(const JemallocAllocatorOptions& options);
 #ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
  ~JemallocNodumpAllocator();
 #endif  // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR