提交 f706a9c1 编写于 作者: B Bo Wang 提交者: Facebook GitHub Bot

Add a secondary cache implementation based on LRUCache 1 (#9518)

Summary:
**Summary:**
RocksDB uses a block cache to reduce IO and make queries more efficient. The block cache is based on the LRU algorithm (LRUCache) and keeps objects containing uncompressed data, such as Block, ParsedFullFilterBlock etc. It allows the user to configure a second level cache (rocksdb::SecondaryCache) to extend the primary block cache by holding items evicted from it. Some of the major RocksDB users, like MyRocks, use direct IO and would like to use a primary block cache for uncompressed data and a secondary cache for compressed data. The latter allows us to mitigate the loss of the Linux page cache due to direct IO.

This PR includes a concrete implementation of rocksdb::SecondaryCache that integrates with compression libraries such as LZ4 and implements an LRU cache to hold compressed blocks.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9518

Test Plan:
In this PR, the lru_secondary_cache_test.cc includes the following tests:
1. The unit tests for the secondary cache with either compression or no compression, such as basic tests, fails tests.
2. The integration tests with both primary cache and this secondary cache .

**Follow Up:**

1. Statistics (e.g. compression ratio) will be added in another PR.
2. Once this implementation is ready, I will do some shadow testing and benchmarking with UDB to measure the impact.

Reviewed By: anand1976

Differential Revision: D34430930

Pulled By: gitbw95

fbshipit-source-id: 218d78b672a2f914856d8a90ff32f2f5b5043ded
上级 6f125998
......@@ -627,6 +627,7 @@ set(SOURCES
cache/cache_reservation_manager.cc
cache/clock_cache.cc
cache/lru_cache.cc
cache/lru_secondary_cache.cc
cache/sharded_cache.cc
db/arena_wrapped_db_iter.cc
db/blob/blob_fetcher.cc
......@@ -1154,6 +1155,7 @@ if(WITH_TESTS)
cache/cache_reservation_manager_test.cc
cache/cache_test.cc
cache/lru_cache_test.cc
cache/lru_secondary_cache_test.cc
db/blob/blob_counting_iterator_test.cc
db/blob/blob_file_addition_test.cc
db/blob/blob_file_builder_test.cc
......
......@@ -1897,6 +1897,9 @@ statistics_test: $(OBJ_DIR)/monitoring/statistics_test.o $(TEST_LIBRARY) $(LIBRA
stats_history_test: $(OBJ_DIR)/monitoring/stats_history_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)
lru_secondary_cache_test: $(OBJ_DIR)/cache/lru_secondary_cache_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)
lru_cache_test: $(OBJ_DIR)/cache/lru_cache_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)
......
......@@ -15,6 +15,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[
"cache/cache_reservation_manager.cc",
"cache/clock_cache.cc",
"cache/lru_cache.cc",
"cache/lru_secondary_cache.cc",
"cache/sharded_cache.cc",
"db/arena_wrapped_db_iter.cc",
"db/blob/blob_fetcher.cc",
......@@ -331,6 +332,7 @@ cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[
"cache/cache_reservation_manager.cc",
"cache/clock_cache.cc",
"cache/lru_cache.cc",
"cache/lru_secondary_cache.cc",
"cache/sharded_cache.cc",
"db/arena_wrapped_db_iter.cc",
"db/blob/blob_fetcher.cc",
......@@ -9096,6 +9098,12 @@ cpp_unittest_wrapper(name="lru_cache_test",
extra_compiler_flags=[])
cpp_unittest_wrapper(name="lru_secondary_cache_test",
srcs=["cache/lru_secondary_cache_test.cc"],
deps=[":rocksdb_test_lib"],
extra_compiler_flags=[])
cpp_unittest_wrapper(name="manual_compaction_test",
srcs=["db/manual_compaction_test.cc"],
deps=[":rocksdb_test_lib"],
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/lru_secondary_cache.h"
#include <memory>
#include "memory/memory_allocator.h"
#include "util/compression.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
namespace {
void DeletionCallback(const Slice& /*key*/, void* obj) {
delete reinterpret_cast<CacheAllocationPtr*>(obj);
obj = nullptr;
}
} // namespace
LRUSecondaryCache::LRUSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
CompressionType compression_type, uint32_t compress_format_version)
: cache_options_(capacity, num_shard_bits, strict_capacity_limit,
high_pri_pool_ratio, memory_allocator, use_adaptive_mutex,
metadata_charge_policy, compression_type,
compress_format_version) {
cache_ = NewLRUCache(capacity, num_shard_bits, strict_capacity_limit,
high_pri_pool_ratio, memory_allocator,
use_adaptive_mutex, metadata_charge_policy);
}
LRUSecondaryCache::~LRUSecondaryCache() { cache_.reset(); }
std::unique_ptr<SecondaryCacheResultHandle> LRUSecondaryCache::Lookup(
const Slice& key, const Cache::CreateCallback& create_cb, bool /*wait*/) {
std::unique_ptr<SecondaryCacheResultHandle> handle;
Cache::Handle* lru_handle = cache_->Lookup(key);
if (lru_handle == nullptr) {
return handle;
}
CacheAllocationPtr* ptr =
reinterpret_cast<CacheAllocationPtr*>(cache_->Value(lru_handle));
void* value = nullptr;
size_t charge = 0;
Status s;
if (cache_options_.compression_type == kNoCompression) {
s = create_cb(ptr->get(), cache_->GetCharge(lru_handle), &value, &charge);
} else {
UncompressionContext uncompression_context(cache_options_.compression_type);
UncompressionInfo uncompression_info(uncompression_context,
UncompressionDict::GetEmptyDict(),
cache_options_.compression_type);
size_t uncompressed_size = 0;
CacheAllocationPtr uncompressed;
uncompressed = UncompressData(
uncompression_info, (char*)ptr->get(), cache_->GetCharge(lru_handle),
&uncompressed_size, cache_options_.compress_format_version,
cache_options_.memory_allocator.get());
if (!uncompressed) {
cache_->Release(lru_handle, true);
return handle;
}
s = create_cb(uncompressed.get(), uncompressed_size, &value, &charge);
}
if (!s.ok()) {
cache_->Release(lru_handle, true);
return handle;
}
handle.reset(new LRUSecondaryCacheResultHandle(value, charge));
cache_->Release(lru_handle);
return handle;
}
Status LRUSecondaryCache::Insert(const Slice& key, void* value,
const Cache::CacheItemHelper* helper) {
size_t size = (*helper->size_cb)(value);
CacheAllocationPtr ptr =
AllocateBlock(size, cache_options_.memory_allocator.get());
Status s = (*helper->saveto_cb)(value, 0, size, ptr.get());
if (!s.ok()) {
return s;
}
Slice val(ptr.get(), size);
std::string compressed_val;
if (cache_options_.compression_type != kNoCompression) {
CompressionOptions compression_opts;
CompressionContext compression_context(cache_options_.compression_type);
uint64_t sample_for_compression = 0;
CompressionInfo compression_info(
compression_opts, compression_context, CompressionDict::GetEmptyDict(),
cache_options_.compression_type, sample_for_compression);
bool success =
CompressData(val, compression_info,
cache_options_.compress_format_version, &compressed_val);
if (!success) {
return Status::Corruption("Error compressing value.");
}
val = Slice(compressed_val);
size = compressed_val.size();
ptr = AllocateBlock(size, cache_options_.memory_allocator.get());
memcpy(ptr.get(), compressed_val.data(), size);
}
CacheAllocationPtr* buf = new CacheAllocationPtr(std::move(ptr));
return cache_->Insert(key, buf, size, DeletionCallback);
}
void LRUSecondaryCache::Erase(const Slice& key) { cache_->Erase(key); }
std::string LRUSecondaryCache::GetPrintableOptions() const {
std::string ret;
ret.reserve(20000);
const int kBufferSize = 200;
char buffer[kBufferSize];
ret.append(cache_->GetPrintableOptions());
snprintf(buffer, kBufferSize, " compression_type : %s\n",
CompressionTypeToString(cache_options_.compression_type).c_str());
ret.append(buffer);
snprintf(buffer, kBufferSize, " compression_type : %d\n",
cache_options_.compress_format_version);
ret.append(buffer);
return ret;
}
std::shared_ptr<SecondaryCache> NewLRUSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
CompressionType compression_type, uint32_t compress_format_version) {
return std::make_shared<LRUSecondaryCache>(
capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio,
memory_allocator, use_adaptive_mutex, metadata_charge_policy,
compression_type, compress_format_version);
}
std::shared_ptr<SecondaryCache> NewLRUSecondaryCache(
const LRUSecondaryCacheOptions& opts) {
// The secondary_cache is disabled for this LRUCache instance.
assert(opts.secondary_cache == nullptr);
return NewLRUSecondaryCache(
opts.capacity, opts.num_shard_bits, opts.strict_capacity_limit,
opts.high_pri_pool_ratio, opts.memory_allocator, opts.use_adaptive_mutex,
opts.metadata_charge_policy, opts.compression_type,
opts.compress_format_version);
}
} // namespace ROCKSDB_NAMESPACE
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <memory>
#include "cache/lru_cache.h"
#include "memory/memory_allocator.h"
#include "rocksdb/secondary_cache.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "util/compression.h"
namespace ROCKSDB_NAMESPACE {
class LRUSecondaryCacheResultHandle : public SecondaryCacheResultHandle {
public:
LRUSecondaryCacheResultHandle(void* value, size_t size)
: value_(value), size_(size) {}
virtual ~LRUSecondaryCacheResultHandle() override = default;
LRUSecondaryCacheResultHandle(const LRUSecondaryCacheResultHandle&) = delete;
LRUSecondaryCacheResultHandle& operator=(
const LRUSecondaryCacheResultHandle&) = delete;
bool IsReady() override { return true; }
void Wait() override {}
void* Value() override { return value_; }
size_t Size() override { return size_; }
private:
void* value_;
size_t size_;
};
// The LRUSecondaryCache is a concrete implementation of
// rocksdb::SecondaryCache.
//
// Users can also cast a pointer to it and call methods on
// it directly, especially custom methods that may be added
// in the future. For example -
// std::unique_ptr<rocksdb::SecondaryCache> cache =
// NewLRUSecondaryCache(opts);
// static_cast<LRUSecondaryCache*>(cache.get())->Erase(key);
class LRUSecondaryCache : public SecondaryCache {
public:
LRUSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy metadata_charge_policy =
kDontChargeCacheMetadata,
CompressionType compression_type = CompressionType::kLZ4Compression,
uint32_t compress_format_version = 2);
virtual ~LRUSecondaryCache() override;
const char* Name() const override { return "LRUSecondaryCache"; }
Status Insert(const Slice& key, void* value,
const Cache::CacheItemHelper* helper) override;
std::unique_ptr<SecondaryCacheResultHandle> Lookup(
const Slice& key, const Cache::CreateCallback& create_cb,
bool /*wait*/) override;
void Erase(const Slice& key) override;
void WaitAll(std::vector<SecondaryCacheResultHandle*> /*handles*/) override {}
std::string GetPrintableOptions() const override;
private:
std::shared_ptr<Cache> cache_;
LRUSecondaryCacheOptions cache_options_;
};
} // namespace ROCKSDB_NAMESPACE
此差异已折叠。
......@@ -27,6 +27,7 @@
#include <memory>
#include <string>
#include "rocksdb/compression_type.h"
#include "rocksdb/memory_allocator.h"
#include "rocksdb/slice.h"
#include "rocksdb/statistics.h"
......@@ -127,6 +128,53 @@ extern std::shared_ptr<Cache> NewLRUCache(
extern std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts);
// EXPERIMENTAL
// Options structure for configuring a SecondaryCache instance based on
// LRUCache. The LRUCacheOptions.secondary_cache is not used and
// should not be set.
struct LRUSecondaryCacheOptions : LRUCacheOptions {
// The compression method (if any) that is used to compress data.
CompressionType compression_type = CompressionType::kLZ4Compression;
// compress_format_version can have two values:
// compress_format_version == 1 -- decompressed size is not included in the
// block header.
// compress_format_version == 2 -- decompressed size is included in the block
// header in varint32 format.
uint32_t compress_format_version = 2;
LRUSecondaryCacheOptions() {}
LRUSecondaryCacheOptions(
size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
double _high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
bool _use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy _metadata_charge_policy =
kDefaultCacheMetadataChargePolicy,
CompressionType _compression_type = CompressionType::kLZ4Compression,
uint32_t _compress_format_version = 2)
: LRUCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
_high_pri_pool_ratio, std::move(_memory_allocator),
_use_adaptive_mutex, _metadata_charge_policy),
compression_type(_compression_type),
compress_format_version(_compress_format_version) {}
};
// EXPERIMENTAL
// Create a new Secondary Cache that is implemented on top of LRUCache.
extern std::shared_ptr<SecondaryCache> NewLRUSecondaryCache(
size_t capacity, int num_shard_bits = -1,
bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy metadata_charge_policy =
kDefaultCacheMetadataChargePolicy,
CompressionType compression_type = CompressionType::kLZ4Compression,
uint32_t compress_format_version = 2);
extern std::shared_ptr<SecondaryCache> NewLRUSecondaryCache(
const LRUSecondaryCacheOptions& opts);
// Similar to NewLRUCache, but create a cache based on CLOCK algorithm with
// better concurrent performance in some cases. See util/clock_cache.cc for
// more detail.
......
......@@ -6,6 +6,7 @@ LIB_SOURCES = \
cache/cache_reservation_manager.cc \
cache/clock_cache.cc \
cache/lru_cache.cc \
cache/lru_secondary_cache.cc \
cache/sharded_cache.cc \
db/arena_wrapped_db_iter.cc \
db/blob/blob_fetcher.cc \
......@@ -400,6 +401,7 @@ TEST_MAIN_SOURCES = \
cache/cache_test.cc \
cache/cache_reservation_manager_test.cc \
cache/lru_cache_test.cc \
cache/lru_secondary_cache_test.cc \
db/blob/blob_counting_iterator_test.cc \
db/blob/blob_file_addition_test.cc \
db/blob/blob_file_builder_test.cc \
......
......@@ -556,6 +556,38 @@ DEFINE_double(cache_high_pri_pool_ratio, 0.0,
DEFINE_bool(use_clock_cache, false,
"Replace default LRU block cache with clock cache.");
DEFINE_bool(use_lru_secondary_cache, false,
"Use the LRUSecondaryCache as the secondary cache.");
DEFINE_int64(lru_secondary_cache_size, 8 << 20, // 8MB
"Number of bytes to use as a cache of data");
DEFINE_int32(lru_secondary_cache_numshardbits, 6,
"Number of shards for the block cache"
" is 2 ** lru_secondary_cache_numshardbits."
" Negative means use default settings."
" This is applied only if FLAGS_cache_size is non-negative.");
DEFINE_double(lru_secondary_cache_high_pri_pool_ratio, 0.0,
"Ratio of block cache reserve for high pri blocks. "
"If > 0.0, we also enable "
"cache_index_and_filter_blocks_with_high_priority.");
DEFINE_string(lru_secondary_cache_compression_type, "lz4",
"The compression algorithm to use for large "
"values stored in LRUSecondaryCache.");
static enum ROCKSDB_NAMESPACE::CompressionType
FLAGS_lru_secondary_cache_compression_type_e =
ROCKSDB_NAMESPACE::kLZ4Compression;
DEFINE_uint32(
lru_secondary_cache_compress_format_version, 2,
"compress_format_version can have two values: "
"compress_format_version == 1 -- decompressed size is not included"
" in the block header."
"compress_format_version == 2 -- decompressed size is included"
" in the block header in varint32 format.");
DEFINE_int64(simcache_size, -1,
"Number of bytes to use as a simcache of "
"uncompressed data. Nagative value disables simcache.");
......@@ -2791,6 +2823,21 @@ class Benchmark {
opts.secondary_cache = secondary_cache;
}
#endif // ROCKSDB_LITE
if (FLAGS_use_lru_secondary_cache) {
LRUSecondaryCacheOptions secondary_cache_opts;
secondary_cache_opts.capacity = FLAGS_lru_secondary_cache_size;
secondary_cache_opts.num_shard_bits =
FLAGS_lru_secondary_cache_numshardbits;
secondary_cache_opts.high_pri_pool_ratio =
FLAGS_lru_secondary_cache_high_pri_pool_ratio;
secondary_cache_opts.compression_type =
FLAGS_lru_secondary_cache_compression_type_e;
secondary_cache_opts.compress_format_version =
FLAGS_lru_secondary_cache_compress_format_version;
opts.secondary_cache = NewLRUSecondaryCache(secondary_cache_opts);
}
return NewLRUCache(opts);
}
}
......@@ -7961,6 +8008,9 @@ int db_bench_tool(int argc, char** argv) {
FLAGS_wal_compression_e =
StringToCompressionType(FLAGS_wal_compression.c_str());
FLAGS_lru_secondary_cache_compression_type_e = StringToCompressionType(
FLAGS_lru_secondary_cache_compression_type.c_str());
#ifndef ROCKSDB_LITE
// Stacked BlobDB
FLAGS_blob_db_compression_type_e =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册