diff --git a/table/block_based/filter_policy.cc b/table/block_based/filter_policy.cc index 1b49d554df05f718fff62649a95b2e4b50d00b20..9890a1d8f452ece2a98448bacd2db67982d15846 100644 --- a/table/block_based/filter_policy.cc +++ b/table/block_based/filter_policy.cc @@ -92,6 +92,11 @@ class FastLocalBloomBitsBuilder : public BuiltinFilterBitsBuilder { return num_cache_lines * 64 + /*metadata*/ 5; } + double EstimatedFpRate(size_t keys, size_t bytes) override { + return FastLocalBloomImpl::EstimatedFpRate(keys, bytes - /*metadata*/ 5, + num_probes_, /*hash bits*/ 64); + } + private: void AddAllEntries(char* data, uint32_t len) { // Simple version without prefetching: @@ -194,7 +199,7 @@ using LegacyBloomImpl = LegacyLocalityBloomImpl; class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder { public: - explicit LegacyBloomBitsBuilder(const int bits_per_key); + explicit LegacyBloomBitsBuilder(const int bits_per_key, Logger* info_log); // No Copy allowed LegacyBloomBitsBuilder(const LegacyBloomBitsBuilder&) = delete; @@ -214,10 +219,16 @@ class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder { return CalculateSpace(num_entry, &dont_care1, &dont_care2); } + double EstimatedFpRate(size_t keys, size_t bytes) override { + return LegacyBloomImpl::EstimatedFpRate(keys, bytes - /*metadata*/ 5, + num_probes_); + } + private: int bits_per_key_; int num_probes_; std::vector hash_entries_; + Logger* info_log_; // Get totalbits that optimized for cpu cache line uint32_t GetTotalBitsForLocality(uint32_t total_bits); @@ -234,9 +245,11 @@ class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder { void AddHash(uint32_t h, char* data, uint32_t num_lines, uint32_t total_bits); }; -LegacyBloomBitsBuilder::LegacyBloomBitsBuilder(const int bits_per_key) +LegacyBloomBitsBuilder::LegacyBloomBitsBuilder(const int bits_per_key, + Logger* info_log) : bits_per_key_(bits_per_key), - num_probes_(LegacyNoLocalityBloomImpl::ChooseNumProbes(bits_per_key_)) { + num_probes_(LegacyNoLocalityBloomImpl::ChooseNumProbes(bits_per_key_)), + info_log_(info_log) { assert(bits_per_key_); } @@ -251,14 +264,39 @@ void LegacyBloomBitsBuilder::AddKey(const Slice& key) { Slice LegacyBloomBitsBuilder::Finish(std::unique_ptr* buf) { uint32_t total_bits, num_lines; - char* data = ReserveSpace(static_cast(hash_entries_.size()), &total_bits, - &num_lines); + size_t num_entries = hash_entries_.size(); + char* data = + ReserveSpace(static_cast(num_entries), &total_bits, &num_lines); assert(data); if (total_bits != 0 && num_lines != 0) { for (auto h : hash_entries_) { AddHash(h, data, num_lines, total_bits); } + + // Check for excessive entries for 32-bit hash function + if (num_entries >= /* minimum of 3 million */ 3000000U) { + // More specifically, we can detect that the 32-bit hash function + // is causing significant increase in FP rate by comparing current + // estimated FP rate to what we would get with a normal number of + // keys at same memory ratio. + double est_fp_rate = LegacyBloomImpl::EstimatedFpRate( + num_entries, total_bits / 8, num_probes_); + double vs_fp_rate = LegacyBloomImpl::EstimatedFpRate( + 1U << 16, (1U << 16) * bits_per_key_ / 8, num_probes_); + + if (est_fp_rate >= 1.50 * vs_fp_rate) { + // For more details, see + // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter + ROCKS_LOG_WARN( + info_log_, + "Using legacy SST/BBT Bloom filter with excessive key count " + "(%.1fM @ %dbpk), causing estimated %.1fx higher filter FP rate. " + "Consider using new Bloom with format_version>=5, smaller SST " + "file size, or partitioned filters.", + num_entries / 1000000.0, bits_per_key_, est_fp_rate / vs_fp_rate); + } + } } // See BloomFilterPolicy::GetFilterBitsReader for metadata data[total_bits / 8] = static_cast(num_probes_); @@ -545,7 +583,8 @@ FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext( "with format_version>=5.", whole_bits_per_key_, adjective); } - return new LegacyBloomBitsBuilder(whole_bits_per_key_); + return new LegacyBloomBitsBuilder(whole_bits_per_key_, + context.info_log); } } assert(false); diff --git a/table/block_based/filter_policy_internal.h b/table/block_based/filter_policy_internal.h index 6fe344c488c62f1f87160c106508e055c7efcfba..df182b524f342963c3f63a124d6dcd7a2528e3c8 100644 --- a/table/block_based/filter_policy_internal.h +++ b/table/block_based/filter_policy_internal.h @@ -28,6 +28,11 @@ class BuiltinFilterBitsBuilder : public FilterBitsBuilder { // metadata. Passing the result to CalculateNumEntry should // return >= the num_entry passed in. virtual uint32_t CalculateSpace(const int num_entry) = 0; + + // Returns an estimate of the FP rate of the returned filter if + // `keys` keys are added and the filter returned by Finish is `bytes` + // bytes. + virtual double EstimatedFpRate(size_t keys, size_t bytes) = 0; }; // RocksDB built-in filter policy for Bloom or Bloom-like filters. diff --git a/util/bloom_impl.h b/util/bloom_impl.h index 73575b07c0252bff90f2f93feedeadd7892bad77..2a9fbaef2b750316199731f95d6023f6dbf7b2d5 100644 --- a/util/bloom_impl.h +++ b/util/bloom_impl.h @@ -10,6 +10,7 @@ #pragma once #include #include +#include #include "rocksdb/slice.h" #include "util/hash.h" @@ -20,6 +21,70 @@ namespace rocksdb { +class BloomMath { + public: + // False positive rate of a standard Bloom filter, for given ratio of + // filter memory bits to added keys, and number of probes per operation. + // (The false positive rate is effectively independent of scale, assuming + // the implementation scales OK.) + static double StandardFpRate(double bits_per_key, int num_probes) { + // Standard very-good-estimate formula. See + // https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives + return std::pow(1.0 - std::exp(-num_probes / bits_per_key), num_probes); + } + + // False positive rate of a "blocked"/"shareded"/"cache-local" Bloom filter, + // for given ratio of filter memory bits to added keys, number of probes per + // operation (all within the given block or cache line size), and block or + // cache line size. + static double CacheLocalFpRate(double bits_per_key, int num_probes, + int cache_line_bits) { + double keys_per_cache_line = cache_line_bits / bits_per_key; + // A reasonable estimate is the average of the FP rates for one standard + // deviation above and below the mean bucket occupancy. See + // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#the-math + double keys_stddev = std::sqrt(keys_per_cache_line); + double crowded_fp = StandardFpRate( + cache_line_bits / (keys_per_cache_line + keys_stddev), num_probes); + double uncrowded_fp = StandardFpRate( + cache_line_bits / (keys_per_cache_line - keys_stddev), num_probes); + return (crowded_fp + uncrowded_fp) / 2; + } + + // False positive rate of querying a new item against `num_keys` items, all + // hashed to `fingerprint_bits` bits. (This assumes the fingerprint hashes + // themselves are stored losslessly. See Section 4 of + // http://www.ccs.neu.edu/home/pete/pub/bloom-filters-verification.pdf) + static double FingerprintFpRate(size_t num_keys, int fingerprint_bits) { + double inv_fingerprint_space = std::pow(0.5, fingerprint_bits); + // Base estimate assumes each key maps to a unique fingerprint. + // Could be > 1 in extreme cases. + double base_estimate = num_keys * inv_fingerprint_space; + // To account for potential overlap, we choose between two formulas + if (base_estimate > 0.0001) { + // A very good formula assuming we don't construct a floating point + // number extremely close to 1. Always produces a probability < 1. + return 1.0 - std::exp(-base_estimate); + } else { + // A very good formula when base_estimate is far below 1. (Subtract + // away the integral-approximated sum that some key has same hash as + // one coming before it in a list.) + return base_estimate - (base_estimate * base_estimate * 0.5); + } + } + + // Returns the probably of either of two independent(-ish) events + // happening, given their probabilities. (This is useful for combining + // results from StandardFpRate or CacheLocalFpRate with FingerprintFpRate + // for a hash-efficient Bloom filter's FP rate. See Section 4 of + // http://www.ccs.neu.edu/home/pete/pub/bloom-filters-verification.pdf) + static double IndependentProbabilitySum(double rate1, double rate2) { + // Use formula that avoids floating point extremely close to 1 if + // rates are extremely small. + return rate1 + rate2 - (rate1 * rate2); + } +}; + // A fast, flexible, and accurate cache-local Bloom implementation with // SIMD-optimized query performance (currently using AVX2 on Intel). Write // performance and non-SIMD read are very good, benefiting from fastrange32 @@ -72,6 +137,16 @@ namespace rocksdb { // class FastLocalBloomImpl { public: + // NOTE: this has only been validated to enough accuracy for producing + // reasonable warnings / user feedback, not for making functional decisions. + static double EstimatedFpRate(size_t keys, size_t bytes, int num_probes, + int hash_bits) { + return BloomMath::IndependentProbabilitySum( + BloomMath::CacheLocalFpRate(8.0 * bytes / keys, num_probes, + /*cache line bits*/ 512), + BloomMath::FingerprintFpRate(keys, hash_bits)); + } + static inline int ChooseNumProbes(int millibits_per_key) { // Since this implementation can (with AVX2) make up to 8 probes // for the same cost, we pick the most accurate num_probes, based @@ -328,6 +403,26 @@ class LegacyLocalityBloomImpl { } public: + // NOTE: this has only been validated to enough accuracy for producing + // reasonable warnings / user feedback, not for making functional decisions. + static double EstimatedFpRate(size_t keys, size_t bytes, int num_probes) { + double bits_per_key = 8.0 * bytes / keys; + double filter_rate = BloomMath::CacheLocalFpRate(bits_per_key, num_probes, + /*cache line bits*/ 512); + if (!ExtraRotates) { + // Good estimate of impact of flaw in index computation. + // Adds roughly 0.002 around 50 bits/key and 0.001 around 100 bits/key. + // The + 22 shifts it nicely to fit for lower bits/key. + filter_rate += 0.1 / (bits_per_key * 0.75 + 22); + } else { + // Not yet validated + assert(false); + } + // Always uses 32-bit hash + double fingerprint_rate = BloomMath::FingerprintFpRate(keys, 32); + return BloomMath::IndependentProbabilitySum(filter_rate, fingerprint_rate); + } + static inline void AddHash(uint32_t h, uint32_t num_lines, int num_probes, char *data, int log2_cache_line_bytes) { const int log2_cache_line_bits = log2_cache_line_bytes + 3; diff --git a/util/filter_bench.cc b/util/filter_bench.cc index 04b419dd300b0484d9722c8b6d593a360643c037..d466a469da0af37730914ae7d9741e4abb41b3f9 100644 --- a/util/filter_bench.cc +++ b/util/filter_bench.cc @@ -95,14 +95,20 @@ void _always_assert_fail(int line, const char *file, const char *expr) { #define ALWAYS_ASSERT(cond) \ ((cond) ? (void)0 : ::_always_assert_fail(__LINE__, __FILE__, #cond)) +#ifndef NDEBUG +// This could affect build times enough that we should not include it for +// accurate speed tests +#define PREDICT_FP_RATE +#endif + using rocksdb::Arena; using rocksdb::BlockContents; using rocksdb::BloomFilterPolicy; using rocksdb::BloomHash; +using rocksdb::BuiltinFilterBitsBuilder; using rocksdb::CachableEntry; using rocksdb::EncodeFixed32; using rocksdb::fastrange32; -using rocksdb::FilterBitsBuilder; using rocksdb::FilterBitsReader; using rocksdb::FilterBuildingContext; using rocksdb::FullFilterBlockReader; @@ -302,10 +308,13 @@ void FilterBench::Go() { std::cout << "Building..." << std::endl; - std::unique_ptr builder; + std::unique_ptr builder; size_t total_memory_used = 0; size_t total_keys_added = 0; +#ifdef PREDICT_FP_RATE + double weighted_predicted_fp_rate = 0.0; +#endif rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true); @@ -330,12 +339,17 @@ void FilterBench::Go() { info.filter_ = info.plain_table_bloom_->GetRawData(); } else { if (!builder) { - builder.reset(GetBuilder()); + builder.reset(&dynamic_cast(*GetBuilder())); } for (uint32_t i = 0; i < keys_to_add; ++i) { builder->AddKey(kms_[0].Get(filter_id, i)); } info.filter_ = builder->Finish(&info.owner_); +#ifdef PREDICT_FP_RATE + weighted_predicted_fp_rate += + keys_to_add * + builder->EstimatedFpRate(keys_to_add, info.filter_.size()); +#endif if (FLAGS_new_builder) { builder.reset(); } @@ -362,6 +376,11 @@ void FilterBench::Go() { double bpk = total_memory_used * 8.0 / total_keys_added; std::cout << "Bits/key actual: " << bpk << std::endl; +#ifdef PREDICT_FP_RATE + std::cout << "Predicted FP rate %: " + << 100.0 * (weighted_predicted_fp_rate / total_keys_added) + << std::endl; +#endif if (!FLAGS_quick && !FLAGS_best_case) { double tolerable_rate = std::pow(2.0, -(bpk - 1.0) / (1.4 + bpk / 50.0)); std::cout << "Best possible FP rate %: " << 100.0 * std::pow(2.0, -bpk)