Use block cache to track memory usage when ReadOptions.fill_cache=false

Summary: ReadOptions.fill_cache is set in compaction inputs and can be set by users in their queries too. It tells RocksDB not to put a data block used to block cache. The memory used by the data block is, however, not trackable by users. To make the system more manageable, we can cost the block to block cache while using it, and then release it after using. Closes https://github.com/facebook/rocksdb/pull/3333 Differential Revision: D6670230 Pulled By: miasantreble fbshipit-source-id: ab848d3ed286bd081a13ee1903de357b56cbc308

Use block cache to track memory usage when ReadOptions.fill_cache=false
Summary: ReadOptions.fill_cache is set in compaction inputs and can be set by users in their queries too. It tells RocksDB not to put a data block used to block cache. The memory used by the data block is, however, not trackable by users. To make the system more manageable, we can cost the block to block cache while using it, and then release it after using. Closes https://github.com/facebook/rocksdb/pull/3333 Differential Revision: D6670230 Pulled By: miasantreble fbshipit-source-id: ab848d3ed286bd081a13ee1903de357b56cbc308
3fe09371 · Zhongyi Xie · Facebook Github Bot · e2d4b0ef · 3fe09371 · 3fe09371
Showing with 68 addition and 0 deletion

db/db_block_cache_test.cc db/db_block_cache_test.cc +25 -0

table/block_based_table_reader.cc table/block_based_table_reader.cc +42 -0

table/block_based_table_reader.h table/block_based_table_reader.h +1 -0

未找到文件。
--- a/db/db_block_cache_test.cc
+++ b/db/db_block_cache_test.cc
@@ -111,6 +111,31 @@ class DBBlockCacheTest : public DBTestBase {
  }
 };
+TEST_F(DBBlockCacheTest, IteratorBlockCacheUsage) {
+  ReadOptions read_options;
+  read_options.fill_cache = false;
+  auto table_options = GetTableOptions();
+  auto options = GetOptions(table_options);
+  InitTable(options);
+  std::shared_ptr<Cache> cache = NewLRUCache(0, 0, false);
+  table_options.block_cache = cache;
+  options.table_factory.reset(new BlockBasedTableFactory(table_options));
+  Reopen(options);
+  RecordCacheCounters(options);
+  std::vector<std::unique_ptr<Iterator>> iterators(kNumBlocks - 1);
+  Iterator* iter = nullptr;
+  ASSERT_EQ(0, cache->GetUsage());
+  iter = db_->NewIterator(read_options);
+  iter->Seek(ToString(0));
+  ASSERT_LT(0, cache->GetUsage());
+  delete iter;
+  iter = nullptr;
+  ASSERT_EQ(0, cache->GetUsage());
+}
 TEST_F(DBBlockCacheTest, TestWithoutCompressedBlockCache) {
  ReadOptions read_options;
  auto table_options = GetTableOptions();

--- a/table/block_based_table_reader.cc
+++ b/table/block_based_table_reader.cc
@@ -64,6 +64,8 @@ BlockBasedTable::~BlockBasedTable() {
  delete rep_;
 }
+std::atomic<uint64_t> BlockBasedTable::next_cache_key_id_(0);
 namespace {
 // Read the block identified by "handle" from "file".
 // The only relevant option is options.verify_checksums for now.
@@ -114,6 +116,13 @@ void ReleaseCachedEntry(void* arg, void* h) {
  cache->Release(handle);
 }
+// Release the cached entry and decrement its ref count.
+void ForceReleaseCachedEntry(void* arg, void* h) {
+  Cache* cache = reinterpret_cast<Cache*>(arg);
+  Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
+  cache->Release(handle, true);
+}
 Slice GetCacheKeyFromOffset(const char* cache_key_prefix,
                            size_t cache_key_prefix_size, uint64_t offset,
                            char* cache_key) {
@@ -1508,6 +1517,39 @@ BlockIter* BlockBasedTable::NewDataBlockIterator(
      iter->RegisterCleanup(&ReleaseCachedEntry, block_cache,
                            block.cache_handle);
    } else {
+      if (!ro.fill_cache && rep->cache_key_prefix_size != 0) {
+        // insert a dummy record to block cache to track the memory usage
+        Cache::Handle* cache_handle;
+        // There are two other types of cache keys: 1) SST cache key added in
+        // `MaybeLoadDataBlockToCache` 2) dummy cache key added in
+        // `write_buffer_manager`. Use longer prefix (41 bytes) to differentiate
+        // from SST cache key(31 bytes), and use non-zero prefix to
+        // differentiate from `write_buffer_manager`
+        const size_t kExtraCacheKeyPrefix = kMaxVarint64Length * 4 + 1;
+        char cache_key[kExtraCacheKeyPrefix + kMaxVarint64Length];
+        // Prefix: use rep->cache_key_prefix padded by 0s
+        memset(cache_key, 0, kExtraCacheKeyPrefix + kMaxVarint64Length);
+        assert(rep->cache_key_prefix_size != 0);
+        assert(rep->cache_key_prefix_size <= kExtraCacheKeyPrefix);
+        memcpy(cache_key, rep->cache_key_prefix, rep->cache_key_prefix_size);
+        char* end = EncodeVarint64(cache_key + kExtraCacheKeyPrefix,
+                                   next_cache_key_id_++);
+        assert(end - cache_key <=
+               static_cast<int>(kExtraCacheKeyPrefix + kMaxVarint64Length));
+        Slice unique_key =
+            Slice(cache_key, static_cast<size_t>(end - cache_key));
+        s = block_cache->Insert(unique_key, nullptr, block.value->usable_size(),
+                                nullptr, &cache_handle);
+        if (s.ok()) {
+          if (cache_handle != nullptr) {
+            iter->RegisterCleanup(&ForceReleaseCachedEntry, block_cache,
+                                  cache_handle);
+          }
+        } else {
+          delete block.value;
+          block.value = nullptr;
+        }
+      }
      iter->RegisterCleanup(&DeleteHeldResource<Block>, block.value, nullptr);
    }
  } else {

--- a/table/block_based_table_reader.h
+++ b/table/block_based_table_reader.h
@@ -214,6 +214,7 @@ class BlockBasedTable : public TableReader {
 private:
  friend class MockedBlockBasedTable;
+  static std::atomic<uint64_t> next_cache_key_id_;
  // input_iter: if it is not null, update this one and return it as Iterator
  static BlockIter* NewDataBlockIterator(
      Rep* rep, const ReadOptions& ro, const Slice& index_value,