WriteBufferManager's dummy entry size to block cache 1MB -> 256KB (#5175)

Summary: Dummy cache size of 1MB is too large for small block sizes. Our GetDefaultCacheShardBits() use min_shard_size = 512L * 1024L to determine number of shards, so 1MB will excceeds the size of the whole shard and make the cache excceeds the budget. Change it to 256KB accordingly. There shouldn't be obvious performance impact, since inserting a cache entry every 256KB of memtable inserts is still infrequently enough. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5175 Differential Revision: D14954289 Pulled By: siying fbshipit-source-id: 2c275255c1ac3992174e06529e44c55538325c94

WriteBufferManager's dummy entry size to block cache 1MB -> 256KB (#5175)
Summary: Dummy cache size of 1MB is too large for small block sizes. Our GetDefaultCacheShardBits() use min_shard_size = 512L * 1024L to determine number of shards, so 1MB will excceeds the size of the whole shard and make the cache excceeds the budget. Change it to 256KB accordingly. There shouldn't be obvious performance impact, since inserting a cache entry every 256KB of memtable inserts is still infrequently enough. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5175 Differential Revision: D14954289 Pulled By: siying fbshipit-source-id: 2c275255c1ac3992174e06529e44c55538325c94
beb44ec3 · Siying Dong · Facebook Github Bot · f1239d5f · beb44ec3 · beb44ec3
4 changed file
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -15,6 +15,7 @@
 * Fix a bug in Encryption Env which could cause encrypted files to be read beyond file boundaries.
 * Fix a race condition between WritePrepared::Get and ::Put with duplicate keys.
 * Fix crash when memtable prefix bloom is enabled and read/write a key out of domain of prefix extractor.
+* Adjust WriteBufferManager's dummy entry size to block cache from 1MB to 256KB.


 ## 6.1.0 (3/27/2019)

--- a/db/db_test2.cc
+++ b/db/db_test2.cc
@@ -200,7 +200,7 @@ TEST_P(DBTestSharedWriteBufferAcrossCFs, SharedWriteBufferAcrossCFs) {

  // The total soft write buffer size is about 105000
  std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
-  ASSERT_LT(cache->GetUsage(), 1024 * 1024);
+  ASSERT_LT(cache->GetUsage(), 256 * 1024);

  if (use_old_interface_) {
    options.db_write_buffer_size = 120000;  // this is the real limit
@@ -236,14 +236,14 @@ TEST_P(DBTestSharedWriteBufferAcrossCFs, SharedWriteBufferAcrossCFs) {

  ASSERT_OK(Put(3, Key(1), DummyString(30000), wo));
  if (cost_cache_) {
-    ASSERT_GE(cache->GetUsage(), 1024 * 1024);
-    ASSERT_LE(cache->GetUsage(), 2 * 1024 * 1024);
+    ASSERT_GE(cache->GetUsage(), 256 * 1024);
+    ASSERT_LE(cache->GetUsage(), 2 * 256 * 1024);
  }
  wait_flush();
  ASSERT_OK(Put(0, Key(1), DummyString(60000), wo));
  if (cost_cache_) {
-    ASSERT_GE(cache->GetUsage(), 1024 * 1024);
-    ASSERT_LE(cache->GetUsage(), 2 * 1024 * 1024);
+    ASSERT_GE(cache->GetUsage(), 256 * 1024);
+    ASSERT_LE(cache->GetUsage(), 2 * 256 * 1024);
  }
  wait_flush();
  ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
@@ -339,11 +339,11 @@ TEST_P(DBTestSharedWriteBufferAcrossCFs, SharedWriteBufferAcrossCFs) {
              static_cast<uint64_t>(2));
  }
  if (cost_cache_) {
-    ASSERT_GE(cache->GetUsage(), 1024 * 1024);
+    ASSERT_GE(cache->GetUsage(), 256 * 1024);
    Close();
    options.write_buffer_manager.reset();
    last_options_.write_buffer_manager.reset();
-    ASSERT_LT(cache->GetUsage(), 1024 * 1024);
+    ASSERT_LT(cache->GetUsage(), 256 * 1024);
  }
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
 }
@@ -467,8 +467,8 @@ TEST_F(DBTest2, TestWriteBufferNoLimitWithCache) {
  Reopen(options);

  ASSERT_OK(Put("foo", "bar"));
-  // One dummy entry is 1MB.
-  ASSERT_GT(cache->GetUsage(), 500000);
+  // One dummy entry is 256KB.
+  ASSERT_GT(cache->GetUsage(), 128000);
 }

 namespace {

--- a/memtable/write_buffer_manager.cc
+++ b/memtable/write_buffer_manager.cc
@@ -14,7 +14,7 @@
 namespace rocksdb {
 #ifndef ROCKSDB_LITE
 namespace {
-const size_t kSizeDummyEntry = 1024 * 1024;
+const size_t kSizeDummyEntry = 256 * 1024;
 // The key will be longer than keys for blocks in SST files so they won't
 // conflict.
 const size_t kCacheKeyPrefix = kMaxVarint64Length * 4 + 1;
@@ -86,7 +86,7 @@ void WriteBufferManager::ReserveMemWithCache(size_t mem) {
  size_t new_mem_used = memory_used_.load(std::memory_order_relaxed) + mem;
  memory_used_.store(new_mem_used, std::memory_order_relaxed);
  while (new_mem_used > cache_rep_->cache_allocated_size_) {
-    // Expand size by at least 1MB.
+    // Expand size by at least 256KB.
    // Add a dummy record to the cache
    Cache::Handle* handle;
    cache_rep_->cache_->Insert(cache_rep_->GetNextCacheKey(), nullptr,

--- a/memtable/write_buffer_manager_test.cc
+++ b/memtable/write_buffer_manager_test.cc
@@ -57,67 +57,67 @@ TEST_F(WriteBufferManagerTest, CacheCost) {
  std::unique_ptr<WriteBufferManager> wbf(
      new WriteBufferManager(50 * 1024 * 1024, cache));

-  // Allocate 1.5MB will allocate 2MB
-  wbf->ReserveMem(1536 * 1024);
-  ASSERT_GE(cache->GetPinnedUsage(), 2 * 1024 * 1024);
-  ASSERT_LT(cache->GetPinnedUsage(), 2 * 1024 * 1024 + 10000);
+  // Allocate 333KB will allocate 512KB
+  wbf->ReserveMem(333 * 1024);
+  ASSERT_GE(cache->GetPinnedUsage(), 2 * 256 * 1024);
+  ASSERT_LT(cache->GetPinnedUsage(), 2 * 256 * 1024 + 10000);

-  // Allocate another 2MB
-  wbf->ReserveMem(2 * 1024 * 1024);
-  ASSERT_GE(cache->GetPinnedUsage(), 4 * 1024 * 1024);
-  ASSERT_LT(cache->GetPinnedUsage(), 4 * 1024 * 1024 + 10000);
+  // Allocate another 512KB
+  wbf->ReserveMem(512 * 1024);
+  ASSERT_GE(cache->GetPinnedUsage(), 4 * 256 * 1024);
+  ASSERT_LT(cache->GetPinnedUsage(), 4 * 256 * 1024 + 10000);

-  // Allocate another 20MB
-  wbf->ReserveMem(20 * 1024 * 1024);
-  ASSERT_GE(cache->GetPinnedUsage(), 24 * 1024 * 1024);
-  ASSERT_LT(cache->GetPinnedUsage(), 24 * 1024 * 1024 + 10000);
+  // Allocate another 10MB
+  wbf->ReserveMem(10 * 1024 * 1024);
+  ASSERT_GE(cache->GetPinnedUsage(), 11 * 1024 * 1024);
+  ASSERT_LT(cache->GetPinnedUsage(), 11 * 1024 * 1024 + 10000);

-  // Free 2MB will not cause any change in cache cost
-  wbf->FreeMem(2 * 1024 * 1024);
-  ASSERT_GE(cache->GetPinnedUsage(), 24 * 1024 * 1024);
-  ASSERT_LT(cache->GetPinnedUsage(), 24 * 1024 * 1024 + 10000);
+  // Free 1MB will not cause any change in cache cost
+  wbf->FreeMem(1024 * 1024);
+  ASSERT_GE(cache->GetPinnedUsage(), 11 * 1024 * 1024);
+  ASSERT_LT(cache->GetPinnedUsage(), 11 * 1024 * 1024 + 10000);

  ASSERT_FALSE(wbf->ShouldFlush());

-  // Allocate another 30MB
-  wbf->ReserveMem(30 * 1024 * 1024);
-  ASSERT_GE(cache->GetPinnedUsage(), 52 * 1024 * 1024);
-  ASSERT_LT(cache->GetPinnedUsage(), 52 * 1024 * 1024 + 10000);
+  // Allocate another 41MB
+  wbf->ReserveMem(41 * 1024 * 1024);
+  ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024);
+  ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000);
  ASSERT_TRUE(wbf->ShouldFlush());

  ASSERT_TRUE(wbf->ShouldFlush());

  wbf->ScheduleFreeMem(20 * 1024 * 1024);
-  ASSERT_GE(cache->GetPinnedUsage(), 52 * 1024 * 1024);
-  ASSERT_LT(cache->GetPinnedUsage(), 52 * 1024 * 1024 + 10000);
+  ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024);
+  ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000);

  // Still need flush as the hard limit hits
  ASSERT_TRUE(wbf->ShouldFlush());

-  // Free 20MB will releae 1MB from cache
+  // Free 20MB will releae 256KB from cache
  wbf->FreeMem(20 * 1024 * 1024);
-  ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024);
-  ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000);
+  ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 256 * 1024);
+  ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 256 * 1024 + 10000);

  ASSERT_FALSE(wbf->ShouldFlush());

-  // Every free will release 1MB if still not hit 3/4
+  // Every free will release 256KB if still not hit 3/4
  wbf->FreeMem(16 * 1024);
-  ASSERT_GE(cache->GetPinnedUsage(), 50 * 1024 * 1024);
-  ASSERT_LT(cache->GetPinnedUsage(), 50 * 1024 * 1024 + 10000);
+  ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 2 * 256 * 1024);
+  ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 2 * 256 * 1024 + 10000);

  wbf->FreeMem(16 * 1024);
-  ASSERT_GE(cache->GetPinnedUsage(), 49 * 1024 * 1024);
-  ASSERT_LT(cache->GetPinnedUsage(), 49 * 1024 * 1024 + 10000);
+  ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024);
+  ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024 + 10000);

-  // Free 2MB will not cause any change in cache cost
-  wbf->ReserveMem(2 * 1024 * 1024);
-  ASSERT_GE(cache->GetPinnedUsage(), 49 * 1024 * 1024);
-  ASSERT_LT(cache->GetPinnedUsage(), 49 * 1024 * 1024 + 10000);
+  // Reserve 512KB will not cause any change in cache cost
+  wbf->ReserveMem(512 * 1024);
+  ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024);
+  ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024 + 10000);

  wbf->FreeMem(16 * 1024);
-  ASSERT_GE(cache->GetPinnedUsage(), 48 * 1024 * 1024);
-  ASSERT_LT(cache->GetPinnedUsage(), 48 * 1024 * 1024 + 10000);
+  ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 4 * 256 * 1024);
+  ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 4 * 256 * 1024 + 10000);

  // Destory write buffer manger should free everything
  wbf.reset();
@@ -136,8 +136,8 @@ TEST_F(WriteBufferManagerTest, NoCapCacheCost) {
  ASSERT_FALSE(wbf->ShouldFlush());

  wbf->FreeMem(9 * 1024 * 1024);
-  for (int i = 0; i < 10; i++) {
-    wbf->FreeMem(16 * 1024);
+  for (int i = 0; i < 40; i++) {
+    wbf->FreeMem(4 * 1024);
  }
  ASSERT_GE(cache->GetPinnedUsage(), 1024 * 1024);
  ASSERT_LT(cache->GetPinnedUsage(), 1024 * 1024 + 10000);