提交 7e4e3814 编写于 作者: M Maysam Yabandeh 提交者: Facebook Github Bot

Fix the bloom filter skipping empty prefixes

Summary:
bc0da4b5 optimized bloom filters by skipping duplicate entires when the whole key and prefixes are both added to the bloom. It however used empty string as the initial value of the last entry added to the bloom. This is incorrect since empty key/prefix are valid entires by themselves. This patch fixes that.
Closes https://github.com/facebook/rocksdb/pull/3776

Differential Revision: D7778803

Pulled By: maysamyabandeh

fbshipit-source-id: d5a065daebee17f9403cac51e9d5626aac87bfbc
上级 e5a4dacf
......@@ -17,6 +17,8 @@ FullFilterBlockBuilder::FullFilterBlockBuilder(
FilterBitsBuilder* filter_bits_builder)
: prefix_extractor_(prefix_extractor),
whole_key_filtering_(whole_key_filtering),
last_whole_key_recorded_(false),
last_prefix_recorded_(false),
num_added_(0) {
assert(filter_bits_builder != nullptr);
filter_bits_builder_.reset(filter_bits_builder);
......@@ -33,9 +35,10 @@ void FullFilterBlockBuilder::Add(const Slice& key) {
// bits builder to properly detect the duplicates by comparing with the
// last item.
Slice last_whole_key = Slice(last_whole_key_str_);
if (last_whole_key.compare(key) != 0) {
if (!last_whole_key_recorded_ || last_whole_key.compare(key) != 0) {
AddKey(key);
last_whole_key_str_ = key.ToString();
last_whole_key_recorded_ = true;
last_whole_key_str_.assign(key.data(), key.size());
}
}
}
......@@ -59,9 +62,10 @@ inline void FullFilterBlockBuilder::AddPrefix(const Slice& key) {
// bits builder to properly detect the duplicates by comparing with the last
// item.
Slice last_prefix = Slice(last_prefix_str_);
if (last_prefix.compare(prefix) != 0) {
if (!last_prefix_recorded_ || last_prefix.compare(prefix) != 0) {
AddKey(prefix);
last_prefix_str_ = prefix.ToString();
last_prefix_recorded_ = true;
last_prefix_str_.assign(prefix.data(), prefix.size());
}
} else {
AddKey(prefix);
......
......@@ -59,7 +59,9 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
// should NOT dereference them.
const SliceTransform* prefix_extractor_;
bool whole_key_filtering_;
bool last_whole_key_recorded_;
std::string last_whole_key_str_;
bool last_prefix_recorded_;
std::string last_prefix_str_;
uint32_t num_added_;
......
......@@ -162,6 +162,20 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) {
}
TEST_F(FullFilterBlockTest, DuplicateEntries) {
{ // empty prefixes
std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(0));
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>(
table_options_.filter_policy->GetFilterBitsBuilder());
const bool WHOLE_KEY = true;
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
bits_builder);
ASSERT_EQ(0, builder.NumAdded());
builder.Add("key"); // test with empty prefix
ASSERT_EQ(2, bits_builder->hash_entries_.size());
}
// mix of empty and non-empty
std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(7));
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>(
......@@ -170,13 +184,14 @@ TEST_F(FullFilterBlockTest, DuplicateEntries) {
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
bits_builder);
ASSERT_EQ(0, builder.NumAdded());
builder.Add(""); // test with empty key too
builder.Add("prefix1key1");
builder.Add("prefix1key1");
builder.Add("prefix1key2");
builder.Add("prefix1key3");
builder.Add("prefix2key4");
// two prefix adn 4 keys
ASSERT_EQ(2 + 4, bits_builder->hash_entries_.size());
ASSERT_EQ(1 + 2 + 4, bits_builder->hash_entries_.size());
}
TEST_F(FullFilterBlockTest, SingleChunk) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册