提交 89833577 编写于 作者: M Maysam Yabandeh 提交者: Facebook Github Bot

Delete filter before closing the table

Summary:
Some filters such as partitioned filter have pointers to the table for which they are created. Therefore is they are stored in the block cache, the should be forcibly erased from block cache before closing the  table, which would result into deleting the object. Otherwise the destructor will be called later when the cache is lazily erasing the object, which having the parent table no longer existent it could result into undefined behavior.

Update: there will be still cases the filter is not removed from the cache since the table has not kept a pointer to the cache handle to be able to forcibly release it later. We make sure that the filter destructor does not access the table pointer to get around such cases.
Closes https://github.com/facebook/rocksdb/pull/2207

Differential Revision: D4941591

Pulled By: maysamyabandeh

fbshipit-source-id: 56fbab2a11cf447e1aa67caa30b58d7bd7ce5bbd
上级 47a09b0a
......@@ -101,6 +101,11 @@ Status BlockBasedTableFactory::SanitizeOptions(
"Unsupported BlockBasedTable format_version. Please check "
"include/rocksdb/table.h for more info");
}
if (table_options_.block_cache &&
table_options_.no_block_cache) {
return Status::InvalidArgument("Block cache is initialized"
", but it is disabled");
}
return Status::OK();
}
......
......@@ -2063,21 +2063,17 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
}
void BlockBasedTable::Close() {
rep_->filter_entry.Release(rep_->table_options.block_cache.get());
rep_->index_entry.Release(rep_->table_options.block_cache.get());
rep_->range_del_entry.Release(rep_->table_options.block_cache.get());
// cleanup index and filter blocks to avoid accessing dangling pointer
if (!rep_->table_options.no_block_cache) {
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
// Get the filter block key
auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
rep_->footer.metaindex_handle(), cache_key);
rep_->table_options.block_cache.get()->Erase(key);
// Get the index block key
key = GetCacheKeyFromOffset(rep_->cache_key_prefix,
rep_->cache_key_prefix_size,
rep_->dummy_index_reader_offset, cache_key);
rep_->table_options.block_cache.get()->Erase(key);
const bool force_erase = true;
auto block_cache = rep_->table_options.block_cache.get();
if (block_cache) {
// The filter_entry and inde_entry's lifetime ends with table's and is
// forced to be released.
// Note that if the xxx_entry is not set then the cached entry might remian
// in the cache for some more time. The destrcutor hence must take int
// account that the table object migth no longer be available.
rep_->filter_entry.Release(block_cache, force_erase);
rep_->index_entry.Release(block_cache, force_erase);
rep_->range_del_entry.Release(block_cache);
}
}
......
......@@ -370,9 +370,10 @@ struct BlockBasedTable::CachableEntry {
CachableEntry(TValue* _value, Cache::Handle* _cache_handle)
: value(_value), cache_handle(_cache_handle) {}
CachableEntry() : CachableEntry(nullptr, nullptr) {}
void Release(Cache* cache) {
void Release(Cache* cache, bool force_erase = false) {
if (cache_handle) {
cache->Release(cache_handle);
bool erased = cache->Release(cache_handle, force_erase);
assert(!force_erase || erased);
value = nullptr;
cache_handle = nullptr;
}
......
......@@ -82,16 +82,21 @@ PartitionedFilterBlockReader::PartitionedFilterBlockReader(
: FilterBlockReader(contents.data.size(), stats, _whole_key_filtering),
prefix_extractor_(prefix_extractor),
comparator_(comparator),
table_(table) {
table_(table),
block_cache_(table_->rep_->table_options.block_cache.get()){
idx_on_fltr_blk_.reset(new Block(std::move(contents),
kDisableGlobalSequenceNumber,
0 /* read_amp_bytes_per_bit */, stats));
}
PartitionedFilterBlockReader::~PartitionedFilterBlockReader() {
ReadLock rl(&mu_);
for (auto it = handle_list_.begin(); it != handle_list_.end(); ++it) {
table_->rep_->table_options.block_cache.get()->Release(*it);
// The destructor migh be called via cache evict after the table is deleted.
// We should avoid using table_ pointer in destructor then.
if (block_cache_) {
ReadLock rl(&mu_);
for (auto it = handle_list_.begin(); it != handle_list_.end(); ++it) {
block_cache_->Release(*it);
}
}
}
......@@ -122,7 +127,7 @@ bool PartitionedFilterBlockReader::KeyMayMatch(
return res;
}
if (LIKELY(filter_partition.IsSet())) {
filter_partition.Release(table_->rep_->table_options.block_cache.get());
filter_partition.Release(block_cache_);
} else {
delete filter_partition.value;
}
......@@ -154,7 +159,7 @@ bool PartitionedFilterBlockReader::PrefixMayMatch(
return res;
}
if (LIKELY(filter_partition.IsSet())) {
filter_partition.Release(table_->rep_->table_options.block_cache.get());
filter_partition.Release(block_cache_);
} else {
delete filter_partition.value;
}
......@@ -182,8 +187,7 @@ PartitionedFilterBlockReader::GetFilterPartition(Slice* handle_value,
auto s = fltr_blk_handle.DecodeFrom(handle_value);
assert(s.ok());
const bool is_a_filter_partition = true;
auto block_cache = table_->rep_->table_options.block_cache.get();
if (LIKELY(block_cache != nullptr)) {
if (LIKELY(block_cache_ != nullptr)) {
bool pin_cached_filters =
GetLevel() == 0 &&
table_->rep_->table_options.pin_l0_filter_and_index_blocks_in_cache;
......
......@@ -85,6 +85,7 @@ class PartitionedFilterBlockReader : public FilterBlockReader {
std::unique_ptr<Block> idx_on_fltr_blk_;
const Comparator& comparator_;
const BlockBasedTable* table_;
Cache* block_cache_;
std::unordered_map<uint64_t, FilterBlockReader*> filter_cache_;
autovector<Cache::Handle*> handle_list_;
port::RWMutex mu_;
......
......@@ -9,7 +9,6 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <inttypes.h>
#include <stdio.h>
#include <algorithm>
......@@ -1850,7 +1849,10 @@ TEST_F(BlockBasedTableTest, FilterBlockInBlockCache) {
// -- Table construction
Options options;
options.create_if_missing = true;
options.statistics = CreateDBStatistics();
// Keep a ref to statistic to prevent it from being destructed before
// block cache gets cleaned up upon next table_factory.reset
auto statistics = CreateDBStatistics();
options.statistics = statistics;
// Enable the cache for index/filter blocks
BlockBasedTableOptions table_options;
......@@ -1930,15 +1932,17 @@ TEST_F(BlockBasedTableTest, FilterBlockInBlockCache) {
}
// release the iterator so that the block cache can reset correctly.
iter.reset();
c.ResetTableReader();
// -- PART 2: Open with very small block cache
// In this test, no block will ever get hit since the block cache is
// too small to fit even one entry.
table_options.block_cache = NewLRUCache(1, 4);
options.statistics = CreateDBStatistics();
options.table_factory.reset(new BlockBasedTableFactory(table_options));
// Keep a ref to statistic to prevent it from being destructed before
// block cache gets cleaned up upon next table_factory.reset
statistics = CreateDBStatistics();
options.statistics = statistics;
const ImmutableCFOptions ioptions2(options);
c.Reopen(ioptions2);
{
......@@ -1993,7 +1997,10 @@ TEST_F(BlockBasedTableTest, FilterBlockInBlockCache) {
// Open table with filter policy
table_options.filter_policy.reset(NewBloomFilterPolicy(1));
options.table_factory.reset(new BlockBasedTableFactory(table_options));
options.statistics = CreateDBStatistics();
// Keep a ref to statistic to prevent it from being destructed before
// block cache gets cleaned up upon next table_factory.reset
statistics = CreateDBStatistics();
options.statistics = statistics;
ImmutableCFOptions ioptions4(options);
ASSERT_OK(c3.Reopen(ioptions4));
reader = dynamic_cast<BlockBasedTable*>(c3.GetTableReader());
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册