From 108c619acbbbcef0e331f53270af8aad25150a97 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 9 Sep 2019 14:49:39 -0700 Subject: [PATCH] Add regression test for serialized Bloom filters (#5778) Summary: Check that we don't accidentally change the on-disk format of existing Bloom filter implementations, including for various CACHE_LINE_SIZE (by changing temporarily). Pull Request resolved: https://github.com/facebook/rocksdb/pull/5778 Test Plan: thisisthetest Differential Revision: D17269630 Pulled By: pdillinger fbshipit-source-id: c77017662f010a77603b7d475892b1f0d5563d8b --- db/plain_table_db_test.cc | 15 +++- util/bloom_test.cc | 164 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 171 insertions(+), 8 deletions(-) diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index a2f191080..52c7478a4 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -770,8 +770,19 @@ TEST_P(PlainTableDBTest, BloomSchema) { for (unsigned i = 0; i < 32; ++i) { // Known pattern of Bloom filter false positives can detect schema change // with high probability. Known FPs stuffed into bits: - bool expect_fp = (bloom_locality ? 2421694657UL : 1785868347UL) - & (1UL << i); + uint32_t pattern; + if (!bloom_locality) { + pattern = 1785868347UL; + } else if (CACHE_LINE_SIZE == 64) { + pattern = 2421694657UL; + } else if (CACHE_LINE_SIZE == 128) { + pattern = 788710956UL; + } else { + ASSERT_EQ(CACHE_LINE_SIZE, 256); + pattern = 163905UL; + } + bool expect_fp = pattern & (1UL << i); + //fprintf(stderr, "expect_fp@%u: %d\n", i, (int)expect_fp); expect_bloom_not_match = !expect_fp; ASSERT_EQ("NOT_FOUND", Get(NthKey(i, 'n'))); } diff --git a/util/bloom_test.cc b/util/bloom_test.cc index 688920ea0..b75930399 100644 --- a/util/bloom_test.cc +++ b/util/bloom_test.cc @@ -23,6 +23,7 @@ int main() { #include "table/full_filter_bits_builder.h" #include "test_util/testharness.h" #include "test_util/testutil.h" +#include "util/hash.h" #include "util/gflags_compat.h" using GFLAGS_NAMESPACE::ParseCommandLineFlags; @@ -55,7 +56,7 @@ static int NextLength(int length) { class BloomTest : public testing::Test { private: - const FilterPolicy* policy_; + std::unique_ptr policy_; std::string filter_; std::vector keys_; @@ -63,13 +64,20 @@ class BloomTest : public testing::Test { BloomTest() : policy_( NewBloomFilterPolicy(FLAGS_bits_per_key)) {} - ~BloomTest() override { delete policy_; } - void Reset() { keys_.clear(); filter_.clear(); } + void ResetPolicy(const FilterPolicy* policy = nullptr) { + if (policy == nullptr) { + policy_.reset(NewBloomFilterPolicy(FLAGS_bits_per_key)); + } else { + policy_.reset(policy); + } + Reset(); + } + void Add(const Slice& s) { keys_.push_back(s.ToString()); } @@ -90,6 +98,10 @@ class BloomTest : public testing::Test { return filter_.size(); } + Slice FilterData() const { + return Slice(filter_); + } + void DumpFilter() { fprintf(stderr, "F("); for (size_t i = 0; i+1 < filter_.size(); i++) { @@ -173,11 +185,62 @@ TEST_F(BloomTest, VaryingLengths) { ASSERT_LE(mediocre_filters, good_filters/5); } +// Ensure the implementation doesn't accidentally change in an +// incompatible way +TEST_F(BloomTest, Schema) { + char buffer[sizeof(int)]; + + ResetPolicy(NewBloomFilterPolicy(8)); // num_probes = 5 + for (int key = 0; key < 87; key++) { + Add(Key(key, buffer)); + } + Build(); + ASSERT_EQ(BloomHash(FilterData()), 3589896109U); + + ResetPolicy(NewBloomFilterPolicy(9)); // num_probes = 6 + for (int key = 0; key < 87; key++) { + Add(Key(key, buffer)); + } + Build(); + ASSERT_EQ(BloomHash(FilterData()), 969445585); + + ResetPolicy(NewBloomFilterPolicy(11)); // num_probes = 7 + for (int key = 0; key < 87; key++) { + Add(Key(key, buffer)); + } + Build(); + ASSERT_EQ(BloomHash(FilterData()), 1694458207); + + ResetPolicy(NewBloomFilterPolicy(10)); // num_probes = 6 + for (int key = 0; key < 87; key++) { + Add(Key(key, buffer)); + } + Build(); + ASSERT_EQ(BloomHash(FilterData()), 2373646410U); + + ResetPolicy(NewBloomFilterPolicy(10)); + for (int key = 1; key < 87; key++) { + Add(Key(key, buffer)); + } + Build(); + ASSERT_EQ(BloomHash(FilterData()), 1908442116); + + ResetPolicy(NewBloomFilterPolicy(10)); + for (int key = 1; key < 88; key++) { + Add(Key(key, buffer)); + } + Build(); + ASSERT_EQ(BloomHash(FilterData()), 3057004015U); + + ResetPolicy(); +} + + // Different bits-per-byte class FullBloomTest : public testing::Test { private: - const FilterPolicy* policy_; + std::unique_ptr policy_; std::unique_ptr bits_builder_; std::unique_ptr bits_reader_; std::unique_ptr buf_; @@ -190,8 +253,6 @@ class FullBloomTest : public testing::Test { Reset(); } - ~FullBloomTest() override { delete policy_; } - FullFilterBitsBuilder* GetFullFilterBitsBuilder() { return dynamic_cast(bits_builder_.get()); } @@ -203,6 +264,15 @@ class FullBloomTest : public testing::Test { filter_size_ = 0; } + void ResetPolicy(const FilterPolicy* policy = nullptr) { + if (policy == nullptr) { + policy_.reset(NewBloomFilterPolicy(FLAGS_bits_per_key, false)); + } else { + policy_.reset(policy); + } + Reset(); + } + void Add(const Slice& s) { bits_builder_->AddKey(s); } @@ -217,6 +287,10 @@ class FullBloomTest : public testing::Test { return filter_size_; } + Slice FilterData() { + return Slice(buf_.get(), filter_size_); + } + bool Matches(const Slice& s) { if (bits_reader_ == nullptr) { Build(); @@ -305,6 +379,84 @@ TEST_F(FullBloomTest, FullVaryingLengths) { ASSERT_LE(mediocre_filters, good_filters/5); } +namespace { +inline uint32_t SelectByCacheLineSize(uint32_t for64, + uint32_t for128, + uint32_t for256) { + (void)for64; + (void)for128; + (void)for256; +#if CACHE_LINE_SIZE == 64 + return for64; +#elif CACHE_LINE_SIZE == 128 + return for128; +#elif CACHE_LINE_SIZE == 256 + return for256; +#else + #error "CACHE_LINE_SIZE unknown or unrecognized" +#endif +} +} // namespace + +// Ensure the implementation doesn't accidentally change in an +// incompatible way +TEST_F(FullBloomTest, Schema) { + char buffer[sizeof(int)]; + + // Use enough keys so that changing bits / key by 1 is guaranteed to + // change number of allocated cache lines. So keys > max cache line bits. + + ResetPolicy(NewBloomFilterPolicy(8)); // num_probes = 5 + for (int key = 0; key < 2087; key++) { + Add(Key(key, buffer)); + } + Build(); + ASSERT_EQ(BloomHash(FilterData()), + SelectByCacheLineSize(1302145999, 2811644657U, 756553699)); + + ResetPolicy(NewBloomFilterPolicy(9)); // num_probes = 6 + for (int key = 0; key < 2087; key++) { + Add(Key(key, buffer)); + } + Build(); + ASSERT_EQ(BloomHash(FilterData()), + SelectByCacheLineSize(2092755149, 661139132, 1182970461)); + + ResetPolicy(NewBloomFilterPolicy(11)); // num_probes = 7 + for (int key = 0; key < 2087; key++) { + Add(Key(key, buffer)); + } + Build(); + ASSERT_EQ(BloomHash(FilterData()), + SelectByCacheLineSize(3755609649U, 1812694762, 1449142939)); + + ResetPolicy(NewBloomFilterPolicy(10)); // num_probes = 6 + for (int key = 0; key < 2087; key++) { + Add(Key(key, buffer)); + } + Build(); + ASSERT_EQ(BloomHash(FilterData()), + SelectByCacheLineSize(1478976371, 2910591341U, 1182970461)); + + ResetPolicy(NewBloomFilterPolicy(10)); + for (int key = 1; key < 2087; key++) { + Add(Key(key, buffer)); + } + Build(); + ASSERT_EQ(BloomHash(FilterData()), + SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U)); + + ResetPolicy(NewBloomFilterPolicy(10)); + for (int key = 1; key < 2088; key++) { + Add(Key(key, buffer)); + } + Build(); + ASSERT_EQ(BloomHash(FilterData()), + SelectByCacheLineSize(2885052954U, 769447944, 4175124908U)); + + ResetPolicy(); +} + } // namespace rocksdb int main(int argc, char** argv) { -- GitLab