Option to cache index/filter blocks with priority

Summary: Add option to block based table to insert index/filter blocks to block cache with priority. Combined with LRUCache with high_pri_pool_ratio, we can reserved space for index/filter blocks, make them less likely to be evicted. Depends on D61977. Test Plan: See unit test. Reviewers: lightmark, IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, march, leveldb Differential Revision: https://reviews.facebook.net/D62241

Option to cache index/filter blocks with priority
Summary: Add option to block based table to insert index/filter blocks to block cache with priority. Combined with LRUCache with high_pri_pool_ratio, we can reserved space for index/filter blocks, make them less likely to be evicted. Depends on D61977. Test Plan: See unit test. Reviewers: lightmark, IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, march, leveldb Differential Revision: https://reviews.facebook.net/D62241
4a16c32e · Yi Wu · 99c4af71 · 4a16c32e · 4a16c32e · 4a16c32e
7 changed file
--- a/db/db_block_cache_test.cc
+++ b/db/db_block_cache_test.cc
@@ -9,6 +9,7 @@
 #include <cstdlib>
 #include "db/db_test_util.h"
 #include "port/stack_trace.h"
+#include "util/lru_cache.h"

 namespace rocksdb {

@@ -323,6 +324,91 @@ TEST_F(DBBlockCacheTest, IndexAndFilterBlocksStats) {
            filter_bytes_insert);
 }

+namespace {
+
+// A mock cache wraps LRUCache, and record how many entries have been
+// inserted for each priority.
+class MockCache : public LRUCache {
+ public:
+  static uint32_t high_pri_insert_count;
+  static uint32_t low_pri_insert_count;
+
+  MockCache() : LRUCache(1 << 25, 0, false, 0.0) {}
+
+  virtual Status Insert(const Slice& key, void* value, size_t charge,
+                        void (*deleter)(const Slice& key, void* value),
+                        Handle** handle, Priority priority) override {
+    if (priority == Priority::LOW) {
+      low_pri_insert_count++;
+    } else {
+      high_pri_insert_count++;
+    }
+    return LRUCache::Insert(key, value, charge, deleter, handle, priority);
+  }
+};
+
+uint32_t MockCache::high_pri_insert_count = 0;
+uint32_t MockCache::low_pri_insert_count = 0;
+
+}  // anonymous namespace
+
+TEST_F(DBBlockCacheTest, IndexAndFilterBlocksCachePriority) {
+  for (auto priority : {Cache::Priority::LOW, Cache::Priority::HIGH}) {
+    Options options = CurrentOptions();
+    options.create_if_missing = true;
+    options.statistics = rocksdb::CreateDBStatistics();
+    BlockBasedTableOptions table_options;
+    table_options.cache_index_and_filter_blocks = true;
+    table_options.block_cache.reset(new MockCache());
+    table_options.filter_policy.reset(NewBloomFilterPolicy(20));
+    table_options.cache_index_and_filter_blocks_with_high_priority =
+        priority == Cache::Priority::HIGH ? true : false;
+    options.table_factory.reset(new BlockBasedTableFactory(table_options));
+    DestroyAndReopen(options);
+
+    MockCache::high_pri_insert_count = 0;
+    MockCache::low_pri_insert_count = 0;
+
+    // Create a new table.
+    ASSERT_OK(Put("foo", "value"));
+    ASSERT_OK(Put("bar", "value"));
+    ASSERT_OK(Flush());
+    ASSERT_EQ(1, NumTableFilesAtLevel(0));
+
+    // index/filter blocks added to block cache right after table creation.
+    ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
+    ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+    ASSERT_EQ(2, /* only index/filter were added */
+              TestGetTickerCount(options, BLOCK_CACHE_ADD));
+    ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
+    if (priority == Cache::Priority::LOW) {
+      ASSERT_EQ(0, MockCache::high_pri_insert_count);
+      ASSERT_EQ(2, MockCache::low_pri_insert_count);
+    } else {
+      ASSERT_EQ(2, MockCache::high_pri_insert_count);
+      ASSERT_EQ(0, MockCache::low_pri_insert_count);
+    }
+
+    // Access data block.
+    ASSERT_EQ("value", Get("foo"));
+
+    ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
+    ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+    ASSERT_EQ(3, /*adding data block*/
+              TestGetTickerCount(options, BLOCK_CACHE_ADD));
+    ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
+
+    // Data block should be inserted with low priority.
+    if (priority == Cache::Priority::LOW) {
+      ASSERT_EQ(0, MockCache::high_pri_insert_count);
+      ASSERT_EQ(3, MockCache::low_pri_insert_count);
+    } else {
+      ASSERT_EQ(2, MockCache::high_pri_insert_count);
+      ASSERT_EQ(1, MockCache::low_pri_insert_count);
+    }
+  }
+}
+
 TEST_F(DBBlockCacheTest, ParanoidFileChecks) {
  Options options = CurrentOptions();
  options.create_if_missing = true;

--- a/include/rocksdb/table.h
+++ b/include/rocksdb/table.h
@@ -20,6 +20,7 @@
 #include <string>
 #include <unordered_map>

+#include "rocksdb/cache.h"
 #include "rocksdb/env.h"
 #include "rocksdb/immutable_options.h"
 #include "rocksdb/iterator.h"
@@ -65,6 +66,12 @@ struct BlockBasedTableOptions {
  // block during table initialization.
  bool cache_index_and_filter_blocks = false;

+  // If cache_index_and_filter_blocks is enabled, cache index and filter
+  // blocks with high priority. If set to true, depending on implementation of
+  // block cache, index and filter blocks may be less likely to be eviected
+  // than data blocks.
+  bool cache_index_and_filter_blocks_with_high_priority = false;
+
  // if cache_index_and_filter_blocks is true and the below is true, then
  // filter and index blocks are stored in the cache, but a reference is
  // held in the "table reader" object so the blocks are pinned and only

--- a/table/block_based_table_reader.cc
+++ b/table/block_based_table_reader.cc
@@ -1029,8 +1029,11 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
    filter = ReadFilter(rep_);
    if (filter != nullptr) {
      assert(filter->size() > 0);
-      Status s = block_cache->Insert(key, filter, filter->size(),
-                                     &DeleteCachedFilterEntry, &cache_handle);
+      Status s = block_cache->Insert(
+          key, filter, filter->size(), &DeleteCachedFilterEntry, &cache_handle,
+          rep_->table_options.cache_index_and_filter_blocks_with_high_priority
+              ? Cache::Priority::HIGH
+              : Cache::Priority::LOW);
      if (s.ok()) {
        RecordTick(statistics, BLOCK_CACHE_ADD);
        RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, filter->size());
@@ -1092,8 +1095,12 @@ InternalIterator* BlockBasedTable::NewIndexIterator(
    s = CreateIndexReader(&index_reader);
    if (s.ok()) {
      assert(index_reader != nullptr);
-      s = block_cache->Insert(key, index_reader, index_reader->usable_size(),
-                              &DeleteCachedIndexEntry, &cache_handle);
+      s = block_cache->Insert(
+          key, index_reader, index_reader->usable_size(),
+          &DeleteCachedIndexEntry, &cache_handle,
+          rep_->table_options.cache_index_and_filter_blocks_with_high_priority
+              ? Cache::Priority::HIGH
+              : Cache::Priority::LOW);
    }

    if (s.ok()) {

--- a/util/lru_cache.cc
+++ b/util/lru_cache.cc
@@ -405,48 +405,41 @@ size_t LRUCacheShard::GetPinnedUsage() const {
  return usage_ - lru_usage_;
 }

-class LRUCache : public ShardedCache {
- public:
-  LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
-           double high_pri_pool_ratio)
-      : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
-    int num_shards = 1 << num_shard_bits;
-    shards_ = new LRUCacheShard[num_shards];
-    SetCapacity(capacity);
-    SetStrictCapacityLimit(strict_capacity_limit);
-    for (int i = 0; i < num_shards; i++) {
-      shards_[i].SetHighPriorityPoolRatio(high_pri_pool_ratio);
-    }
+LRUCache::LRUCache(size_t capacity, int num_shard_bits,
+                   bool strict_capacity_limit, double high_pri_pool_ratio)
+    : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
+  int num_shards = 1 << num_shard_bits;
+  shards_ = new LRUCacheShard[num_shards];
+  SetCapacity(capacity);
+  SetStrictCapacityLimit(strict_capacity_limit);
+  for (int i = 0; i < num_shards; i++) {
+    shards_[i].SetHighPriorityPoolRatio(high_pri_pool_ratio);
  }
+}

-  virtual ~LRUCache() { delete[] shards_; }
-
-  virtual const char* Name() const override { return "LRUCache"; }
-  virtual CacheShard* GetShard(int shard) override {
-    return reinterpret_cast<CacheShard*>(&shards_[shard]);
-  }
+LRUCache::~LRUCache() { delete[] shards_; }

-  virtual const CacheShard* GetShard(int shard) const override {
-    return reinterpret_cast<CacheShard*>(&shards_[shard]);
-  }
+CacheShard* LRUCache::GetShard(int shard) {
+  return reinterpret_cast<CacheShard*>(&shards_[shard]);
+}

-  virtual void* Value(Handle* handle) override {
-    return reinterpret_cast<const LRUHandle*>(handle)->value;
-  }
+const CacheShard* LRUCache::GetShard(int shard) const {
+  return reinterpret_cast<CacheShard*>(&shards_[shard]);
+}

-  virtual size_t GetCharge(Handle* handle) const override {
-    return reinterpret_cast<const LRUHandle*>(handle)->charge;
-  }
+void* LRUCache::Value(Handle* handle) {
+  return reinterpret_cast<const LRUHandle*>(handle)->value;
+}

-  virtual uint32_t GetHash(Handle* handle) const override {
-    return reinterpret_cast<const LRUHandle*>(handle)->hash;
-  }
+size_t LRUCache::GetCharge(Handle* handle) const {
+  return reinterpret_cast<const LRUHandle*>(handle)->charge;
+}

-  virtual void DisownData() override { shards_ = nullptr; }
+uint32_t LRUCache::GetHash(Handle* handle) const {
+  return reinterpret_cast<const LRUHandle*>(handle)->hash;
+}

- private:
-  LRUCacheShard* shards_;
-};
+void LRUCache::DisownData() { shards_ = nullptr; }

 std::shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits,
                                   bool strict_capacity_limit,

--- a/util/lru_cache.h
+++ b/util/lru_cache.h
@@ -248,4 +248,21 @@ class LRUCacheShard : public CacheShard {
  LRUHandleTable table_;
 };

+class LRUCache : public ShardedCache {
+ public:
+  LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
+           double high_pri_pool_ratio);
+  virtual ~LRUCache();
+  virtual const char* Name() const override { return "LRUCache"; }
+  virtual CacheShard* GetShard(int shard) override;
+  virtual const CacheShard* GetShard(int shard) const override;
+  virtual void* Value(Handle* handle) override;
+  virtual size_t GetCharge(Handle* handle) const override;
+  virtual uint32_t GetHash(Handle* handle) const override;
+  virtual void DisownData() override;
+
+ private:
+  LRUCacheShard* shards_;
+};
+
 }  // namespace rocksdb
--- a/util/options_helper.h
+++ b/util/options_helper.h
@@ -511,6 +511,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
         {offsetof(struct BlockBasedTableOptions,
                   cache_index_and_filter_blocks),
          OptionType::kBoolean, OptionVerificationType::kNormal}},
+        {"cache_index_and_filter_blocks_with_high_priority",
+         {offsetof(struct BlockBasedTableOptions,
+                   cache_index_and_filter_blocks_with_high_priority),
+          OptionType::kBoolean, OptionVerificationType::kNormal}},
        {"pin_l0_filter_and_index_blocks_in_cache",
         {offsetof(struct BlockBasedTableOptions,
                   pin_l0_filter_and_index_blocks_in_cache),

--- a/util/options_settable_test.cc
+++ b/util/options_settable_test.cc
@@ -149,6 +149,7 @@ TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) {
  ASSERT_OK(GetBlockBasedTableOptionsFromString(
      *bbto,
      "cache_index_and_filter_blocks=1;"
+      "cache_index_and_filter_blocks_with_high_priority=true;"
      "pin_l0_filter_and_index_blocks_in_cache=1;"
      "index_type=kHashSearch;"
      "checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;"