From 3fe09371802db3e1c35d924bc76b0ebb252bdc0d Mon Sep 17 00:00:00 2001
From: Zhongyi Xie <xiez@fb.com>
Date: Mon, 29 Jan 2018 14:34:56 -0800
Subject: [PATCH] Use block cache to track memory usage when
 ReadOptions.fill_cache=false

Summary:
ReadOptions.fill_cache is set in compaction inputs and can be set by users in their queries too. It tells RocksDB not to put a data block used to block cache.

The memory used by the data block is, however, not trackable by users.

To make the system more manageable, we can cost the block to block cache while using it, and then release it after using.
Closes https://github.com/facebook/rocksdb/pull/3333

Differential Revision: D6670230

Pulled By: miasantreble

fbshipit-source-id: ab848d3ed286bd081a13ee1903de357b56cbc308
---
 db/db_block_cache_test.cc         | 25 ++++++++++++++++++
 table/block_based_table_reader.cc | 42 +++++++++++++++++++++++++++++++
 table/block_based_table_reader.h  |  1 +
 3 files changed, 68 insertions(+)
diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc
index 169cadc85..82c420c70 100644
--- a/db/db_block_cache_test.cc
+++ b/db/db_block_cache_test.cc
@@ -111,6 +111,31 @@ class DBBlockCacheTest : public DBTestBase {
   }
 };
 
+TEST_F(DBBlockCacheTest, IteratorBlockCacheUsage) {
+  ReadOptions read_options;
+  read_options.fill_cache = false;
+  auto table_options = GetTableOptions();
+  auto options = GetOptions(table_options);
+  InitTable(options);
+
+  std::shared_ptr<Cache> cache = NewLRUCache(0, 0, false);
+  table_options.block_cache = cache;
+  options.table_factory.reset(new BlockBasedTableFactory(table_options));
+  Reopen(options);
+  RecordCacheCounters(options);
+
+  std::vector<std::unique_ptr<Iterator>> iterators(kNumBlocks - 1);
+  Iterator* iter = nullptr;
+
+  ASSERT_EQ(0, cache->GetUsage());
+  iter = db_->NewIterator(read_options);
+  iter->Seek(ToString(0));
+  ASSERT_LT(0, cache->GetUsage());
+  delete iter;
+  iter = nullptr;
+  ASSERT_EQ(0, cache->GetUsage());
+}
+
 TEST_F(DBBlockCacheTest, TestWithoutCompressedBlockCache) {
   ReadOptions read_options;
   auto table_options = GetTableOptions();
diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc
index 808c5fd06..87bc766c1 100644
--- a/table/block_based_table_reader.cc
+++ b/table/block_based_table_reader.cc
@@ -64,6 +64,8 @@ BlockBasedTable::~BlockBasedTable() {
   delete rep_;
 }
 
+std::atomic<uint64_t> BlockBasedTable::next_cache_key_id_(0);
+
 namespace {
 // Read the block identified by "handle" from "file".
 // The only relevant option is options.verify_checksums for now.
@@ -114,6 +116,13 @@ void ReleaseCachedEntry(void* arg, void* h) {
   cache->Release(handle);
 }
 
+// Release the cached entry and decrement its ref count.
+void ForceReleaseCachedEntry(void* arg, void* h) {
+  Cache* cache = reinterpret_cast<Cache*>(arg);
+  Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
+  cache->Release(handle, true);
+}
+
 Slice GetCacheKeyFromOffset(const char* cache_key_prefix,
                             size_t cache_key_prefix_size, uint64_t offset,
                             char* cache_key) {
@@ -1508,6 +1517,39 @@ BlockIter* BlockBasedTable::NewDataBlockIterator(
       iter->RegisterCleanup(&ReleaseCachedEntry, block_cache,
                             block.cache_handle);
     } else {
+      if (!ro.fill_cache && rep->cache_key_prefix_size != 0) {
+        // insert a dummy record to block cache to track the memory usage
+        Cache::Handle* cache_handle;
+        // There are two other types of cache keys: 1) SST cache key added in
+        // `MaybeLoadDataBlockToCache` 2) dummy cache key added in
+        // `write_buffer_manager`. Use longer prefix (41 bytes) to differentiate
+        // from SST cache key(31 bytes), and use non-zero prefix to
+        // differentiate from `write_buffer_manager`
+        const size_t kExtraCacheKeyPrefix = kMaxVarint64Length * 4 + 1;
+        char cache_key[kExtraCacheKeyPrefix + kMaxVarint64Length];
+        // Prefix: use rep->cache_key_prefix padded by 0s
+        memset(cache_key, 0, kExtraCacheKeyPrefix + kMaxVarint64Length);
+        assert(rep->cache_key_prefix_size != 0);
+        assert(rep->cache_key_prefix_size <= kExtraCacheKeyPrefix);
+        memcpy(cache_key, rep->cache_key_prefix, rep->cache_key_prefix_size);
+        char* end = EncodeVarint64(cache_key + kExtraCacheKeyPrefix,
+                                   next_cache_key_id_++);
+        assert(end - cache_key <=
+               static_cast<int>(kExtraCacheKeyPrefix + kMaxVarint64Length));
+        Slice unique_key =
+            Slice(cache_key, static_cast<size_t>(end - cache_key));
+        s = block_cache->Insert(unique_key, nullptr, block.value->usable_size(),
+                                nullptr, &cache_handle);
+        if (s.ok()) {
+          if (cache_handle != nullptr) {
+            iter->RegisterCleanup(&ForceReleaseCachedEntry, block_cache,
+                                  cache_handle);
+          }
+        } else {
+          delete block.value;
+          block.value = nullptr;
+        }
+      }
       iter->RegisterCleanup(&DeleteHeldResource<Block>, block.value, nullptr);
     }
   } else {
diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h
index 5574c5d50..bddec470b 100644
--- a/table/block_based_table_reader.h
+++ b/table/block_based_table_reader.h
@@ -214,6 +214,7 @@ class BlockBasedTable : public TableReader {
 
  private:
   friend class MockedBlockBasedTable;
+  static std::atomic<uint64_t> next_cache_key_id_;
   // input_iter: if it is not null, update this one and return it as Iterator
   static BlockIter* NewDataBlockIterator(
       Rep* rep, const ReadOptions& ro, const Slice& index_value,
-- 
GitLab