diff --git a/db/db_test2.cc b/db/db_test2.cc index b1d328fd6f03a575c6addf0ded7e9d40cf4aaf89..cb715f0d32e9096e795061295de5183a9ddb157d 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -6309,115 +6309,118 @@ TEST_F(DBTest2, BlockBasedTablePrefixGetIndexNotFound) { #ifndef ROCKSDB_LITE TEST_F(DBTest2, AutoPrefixMode1) { - // create a DB with block prefix index - BlockBasedTableOptions table_options; - Options options = CurrentOptions(); - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - options.statistics = CreateDBStatistics(); - - Reopen(options); - - Random rnd(301); - std::string large_value = rnd.RandomString(500); - - ASSERT_OK(Put("a1", large_value)); - ASSERT_OK(Put("x1", large_value)); - ASSERT_OK(Put("y1", large_value)); - ASSERT_OK(Flush()); - - ReadOptions ro; - ro.total_order_seek = false; - ro.auto_prefix_mode = true; - { - std::unique_ptr iterator(db_->NewIterator(ro)); - iterator->Seek("b1"); - ASSERT_TRUE(iterator->Valid()); - ASSERT_EQ("x1", iterator->key().ToString()); - ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); - ASSERT_OK(iterator->status()); - } + do { + // create a DB with block prefix index + Options options = CurrentOptions(); + BlockBasedTableOptions table_options = + *options.table_factory->GetOptions(); + table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.prefix_extractor.reset(NewFixedPrefixTransform(1)); + options.statistics = CreateDBStatistics(); - std::string ub_str = "b9"; - Slice ub(ub_str); - ro.iterate_upper_bound = &ub; + Reopen(options); - { - std::unique_ptr iterator(db_->NewIterator(ro)); - iterator->Seek("b1"); - ASSERT_FALSE(iterator->Valid()); - ASSERT_EQ(1, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); - ASSERT_OK(iterator->status()); - } + Random rnd(301); + std::string large_value = rnd.RandomString(500); - ub_str = "z"; - ub = Slice(ub_str); - { - std::unique_ptr iterator(db_->NewIterator(ro)); - iterator->Seek("b1"); - ASSERT_TRUE(iterator->Valid()); - ASSERT_EQ("x1", iterator->key().ToString()); - ASSERT_EQ(1, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); - ASSERT_OK(iterator->status()); - } + ASSERT_OK(Put("a1", large_value)); + ASSERT_OK(Put("x1", large_value)); + ASSERT_OK(Put("y1", large_value)); + ASSERT_OK(Flush()); - ub_str = "c"; - ub = Slice(ub_str); - { - std::unique_ptr iterator(db_->NewIterator(ro)); - iterator->Seek("b1"); - ASSERT_FALSE(iterator->Valid()); - ASSERT_EQ(2, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); - ASSERT_OK(iterator->status()); - } + ReadOptions ro; + ro.total_order_seek = false; + ro.auto_prefix_mode = true; + { + std::unique_ptr iterator(db_->NewIterator(ro)); + iterator->Seek("b1"); + ASSERT_TRUE(iterator->Valid()); + ASSERT_EQ("x1", iterator->key().ToString()); + ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + ASSERT_OK(iterator->status()); + } - // The same queries without recreating iterator - { - ub_str = "b9"; - ub = Slice(ub_str); + std::string ub_str = "b9"; + Slice ub(ub_str); ro.iterate_upper_bound = &ub; - std::unique_ptr iterator(db_->NewIterator(ro)); - iterator->Seek("b1"); - ASSERT_FALSE(iterator->Valid()); - ASSERT_EQ(3, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); - ASSERT_OK(iterator->status()); + { + std::unique_ptr iterator(db_->NewIterator(ro)); + iterator->Seek("b1"); + ASSERT_FALSE(iterator->Valid()); + ASSERT_EQ(1, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + ASSERT_OK(iterator->status()); + } ub_str = "z"; ub = Slice(ub_str); - - iterator->Seek("b1"); - ASSERT_TRUE(iterator->Valid()); - ASSERT_EQ("x1", iterator->key().ToString()); - ASSERT_EQ(3, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + { + std::unique_ptr iterator(db_->NewIterator(ro)); + iterator->Seek("b1"); + ASSERT_TRUE(iterator->Valid()); + ASSERT_EQ("x1", iterator->key().ToString()); + ASSERT_EQ(1, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + ASSERT_OK(iterator->status()); + } ub_str = "c"; ub = Slice(ub_str); + { + std::unique_ptr iterator(db_->NewIterator(ro)); + iterator->Seek("b1"); + ASSERT_FALSE(iterator->Valid()); + ASSERT_EQ(2, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + ASSERT_OK(iterator->status()); + } - iterator->Seek("b1"); - ASSERT_FALSE(iterator->Valid()); - ASSERT_EQ(4, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + // The same queries without recreating iterator + { + ub_str = "b9"; + ub = Slice(ub_str); + ro.iterate_upper_bound = &ub; - ub_str = "b9"; - ub = Slice(ub_str); - ro.iterate_upper_bound = &ub; - iterator->SeekForPrev("b1"); - ASSERT_TRUE(iterator->Valid()); - ASSERT_EQ("a1", iterator->key().ToString()); - ASSERT_EQ(4, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + std::unique_ptr iterator(db_->NewIterator(ro)); + iterator->Seek("b1"); + ASSERT_FALSE(iterator->Valid()); + ASSERT_EQ(3, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + ASSERT_OK(iterator->status()); - ub_str = "zz"; - ub = Slice(ub_str); - ro.iterate_upper_bound = &ub; - iterator->SeekToLast(); - ASSERT_TRUE(iterator->Valid()); - ASSERT_EQ("y1", iterator->key().ToString()); + ub_str = "z"; + ub = Slice(ub_str); - iterator->SeekToFirst(); - ASSERT_TRUE(iterator->Valid()); - ASSERT_EQ("a1", iterator->key().ToString()); - } + iterator->Seek("b1"); + ASSERT_TRUE(iterator->Valid()); + ASSERT_EQ("x1", iterator->key().ToString()); + ASSERT_EQ(3, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + + ub_str = "c"; + ub = Slice(ub_str); + + iterator->Seek("b1"); + ASSERT_FALSE(iterator->Valid()); + ASSERT_EQ(4, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + + ub_str = "b9"; + ub = Slice(ub_str); + ro.iterate_upper_bound = &ub; + iterator->SeekForPrev("b1"); + ASSERT_TRUE(iterator->Valid()); + ASSERT_EQ("a1", iterator->key().ToString()); + ASSERT_EQ(4, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); + + ub_str = "zz"; + ub = Slice(ub_str); + ro.iterate_upper_bound = &ub; + iterator->SeekToLast(); + ASSERT_TRUE(iterator->Valid()); + ASSERT_EQ("y1", iterator->key().ToString()); + + iterator->SeekToFirst(); + ASSERT_TRUE(iterator->Valid()); + ASSERT_EQ("a1", iterator->key().ToString()); + } + } while (ChangeOptions(kSkipPlainTable)); } class RenameCurrentTest : public DBTestBase, diff --git a/table/block_based/filter_block.h b/table/block_based/filter_block.h index 5b41202be13189b05e48b65264aa2a4305f9c52b..e767df7f1d5921980a6690b63d796e068dc3c763 100644 --- a/table/block_based/filter_block.h +++ b/table/block_based/filter_block.h @@ -188,16 +188,7 @@ class FilterBlockReader { const Slice* const const_ikey_ptr, bool* filter_checked, bool need_upper_bound_check, bool no_io, - BlockCacheLookupContext* lookup_context) { - if (need_upper_bound_check) { - return true; - } - *filter_checked = true; - Slice prefix = prefix_extractor->Transform(user_key_without_ts); - return PrefixMayMatch(prefix, prefix_extractor, kNotValid, no_io, - const_ikey_ptr, /* get_context */ nullptr, - lookup_context); - } + BlockCacheLookupContext* lookup_context) = 0; }; } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/filter_block_reader_common.cc b/table/block_based/filter_block_reader_common.cc index 135fffdf2df1c00d3107a49bb1a39b902df379ec..5795e6017791d8f21f8b734cbe6271d47e50380b 100644 --- a/table/block_based/filter_block_reader_common.cc +++ b/table/block_based/filter_block_reader_common.cc @@ -94,6 +94,64 @@ size_t FilterBlockReaderCommon::ApproximateFilterBlockMemoryUsage() : 0; } +template +bool FilterBlockReaderCommon::RangeMayExist( + const Slice* iterate_upper_bound, const Slice& user_key_without_ts, + const SliceTransform* prefix_extractor, const Comparator* comparator, + const Slice* const const_ikey_ptr, bool* filter_checked, + bool need_upper_bound_check, bool no_io, + BlockCacheLookupContext* lookup_context) { + if (!prefix_extractor || !prefix_extractor->InDomain(user_key_without_ts)) { + *filter_checked = false; + return true; + } + Slice prefix = prefix_extractor->Transform(user_key_without_ts); + if (need_upper_bound_check && + !IsFilterCompatible(iterate_upper_bound, prefix, comparator)) { + *filter_checked = false; + return true; + } else { + *filter_checked = true; + return PrefixMayMatch(prefix, prefix_extractor, kNotValid, no_io, + const_ikey_ptr, /* get_context */ nullptr, + lookup_context); + } +} + +template +bool FilterBlockReaderCommon::IsFilterCompatible( + const Slice* iterate_upper_bound, const Slice& prefix, + const Comparator* comparator) const { + // Try to reuse the bloom filter in the SST table if prefix_extractor in + // mutable_cf_options has changed. If range [user_key, upper_bound) all + // share the same prefix then we may still be able to use the bloom filter. + const SliceTransform* const prefix_extractor = table_prefix_extractor(); + if (iterate_upper_bound != nullptr && prefix_extractor) { + if (!prefix_extractor->InDomain(*iterate_upper_bound)) { + return false; + } + Slice upper_bound_xform = prefix_extractor->Transform(*iterate_upper_bound); + // first check if user_key and upper_bound all share the same prefix + if (comparator->CompareWithoutTimestamp(prefix, false, upper_bound_xform, + false) != 0) { + // second check if user_key's prefix is the immediate predecessor of + // upper_bound and have the same length. If so, we know for sure all + // keys in the range [user_key, upper_bound) share the same prefix. + // Also need to make sure upper_bound are full length to ensure + // correctness + if (!full_length_enabled_ || + iterate_upper_bound->size() != prefix_extractor_full_length_ || + !comparator->IsSameLengthImmediateSuccessor(prefix, + *iterate_upper_bound)) { + return false; + } + } + return true; + } else { + return false; + } +} + // Explicitly instantiate templates for both "blocklike" types we use. // This makes it possible to keep the template definitions in the .cc file. template class FilterBlockReaderCommon; diff --git a/table/block_based/filter_block_reader_common.h b/table/block_based/filter_block_reader_common.h index a18bc5449b4010ee26fe0df4daf0b0efa4023000..264258c8cc1887443c364947833b882b1d9aee41 100644 --- a/table/block_based/filter_block_reader_common.h +++ b/table/block_based/filter_block_reader_common.h @@ -26,8 +26,20 @@ class FilterBlockReaderCommon : public FilterBlockReader { CachableEntry&& filter_block) : table_(t), filter_block_(std::move(filter_block)) { assert(table_); + const SliceTransform* const prefix_extractor = table_prefix_extractor(); + if (prefix_extractor) { + full_length_enabled_ = + prefix_extractor->FullLengthEnabled(&prefix_extractor_full_length_); + } } + bool RangeMayExist(const Slice* iterate_upper_bound, const Slice& user_key, + const SliceTransform* prefix_extractor, + const Comparator* comparator, + const Slice* const const_ikey_ptr, bool* filter_checked, + bool need_upper_bound_check, bool no_io, + BlockCacheLookupContext* lookup_context) override; + protected: static Status ReadFilterBlock(const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, @@ -47,9 +59,15 @@ class FilterBlockReaderCommon : public FilterBlockReader { size_t ApproximateFilterBlockMemoryUsage() const; + private: + bool IsFilterCompatible(const Slice* iterate_upper_bound, const Slice& prefix, + const Comparator* comparator) const; + private: const BlockBasedTable* table_; CachableEntry filter_block_; + size_t prefix_extractor_full_length_ = 0; + bool full_length_enabled_ = false; }; } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/full_filter_block.cc b/table/block_based/full_filter_block.cc index 6edf76f3e2c2afe5d0a1933100948db1ecd7a6b7..1234d251f26e8866639927146c27fcfe6bbbabef 100644 --- a/table/block_based/full_filter_block.cc +++ b/table/block_based/full_filter_block.cc @@ -120,11 +120,6 @@ FullFilterBlockReader::FullFilterBlockReader( const BlockBasedTable* t, CachableEntry&& filter_block) : FilterBlockReaderCommon(t, std::move(filter_block)) { - const SliceTransform* const prefix_extractor = table_prefix_extractor(); - if (prefix_extractor) { - full_length_enabled_ = - prefix_extractor->FullLengthEnabled(&prefix_extractor_full_length_); - } } bool FullFilterBlockReader::KeyMayMatch( @@ -306,60 +301,4 @@ size_t FullFilterBlockReader::ApproximateMemoryUsage() const { return usage; } -bool FullFilterBlockReader::RangeMayExist( - const Slice* iterate_upper_bound, const Slice& user_key_without_ts, - const SliceTransform* prefix_extractor, const Comparator* comparator, - const Slice* const const_ikey_ptr, bool* filter_checked, - bool need_upper_bound_check, bool no_io, - BlockCacheLookupContext* lookup_context) { - if (!prefix_extractor || !prefix_extractor->InDomain(user_key_without_ts)) { - *filter_checked = false; - return true; - } - Slice prefix = prefix_extractor->Transform(user_key_without_ts); - if (need_upper_bound_check && - !IsFilterCompatible(iterate_upper_bound, prefix, comparator)) { - *filter_checked = false; - return true; - } else { - *filter_checked = true; - return PrefixMayMatch(prefix, prefix_extractor, kNotValid, no_io, - const_ikey_ptr, /* get_context */ nullptr, - lookup_context); - } -} - -bool FullFilterBlockReader::IsFilterCompatible( - const Slice* iterate_upper_bound, const Slice& prefix, - const Comparator* comparator) const { - // Try to reuse the bloom filter in the SST table if prefix_extractor in - // mutable_cf_options has changed. If range [user_key, upper_bound) all - // share the same prefix then we may still be able to use the bloom filter. - const SliceTransform* const prefix_extractor = table_prefix_extractor(); - if (iterate_upper_bound != nullptr && prefix_extractor) { - if (!prefix_extractor->InDomain(*iterate_upper_bound)) { - return false; - } - Slice upper_bound_xform = prefix_extractor->Transform(*iterate_upper_bound); - // first check if user_key and upper_bound all share the same prefix - if (comparator->CompareWithoutTimestamp(prefix, false, upper_bound_xform, - false) != 0) { - // second check if user_key's prefix is the immediate predecessor of - // upper_bound and have the same length. If so, we know for sure all - // keys in the range [user_key, upper_bound) share the same prefix. - // Also need to make sure upper_bound are full length to ensure - // correctness - if (!full_length_enabled_ || - iterate_upper_bound->size() != prefix_extractor_full_length_ || - !comparator->IsSameLengthImmediateSuccessor(prefix, - *iterate_upper_bound)) { - return false; - } - } - return true; - } else { - return false; - } -} - } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/full_filter_block.h b/table/block_based/full_filter_block.h index 63d407dd5f004615794d53ef0f83c421f07f5f0d..3753a1c3d6f19ce0c37ebeca6abd1f84e0f1f80e 100644 --- a/table/block_based/full_filter_block.h +++ b/table/block_based/full_filter_block.h @@ -131,25 +131,12 @@ class FullFilterBlockReader uint64_t block_offset, const bool no_io, BlockCacheLookupContext* lookup_context) override; size_t ApproximateMemoryUsage() const override; - bool RangeMayExist(const Slice* iterate_upper_bound, const Slice& user_key, - const SliceTransform* prefix_extractor, - const Comparator* comparator, - const Slice* const const_ikey_ptr, bool* filter_checked, - bool need_upper_bound_check, bool no_io, - BlockCacheLookupContext* lookup_context) override; - private: bool MayMatch(const Slice& entry, bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context) const; void MayMatch(MultiGetRange* range, bool no_io, const SliceTransform* prefix_extractor, BlockCacheLookupContext* lookup_context) const; - bool IsFilterCompatible(const Slice* iterate_upper_bound, const Slice& prefix, - const Comparator* comparator) const; - - private: - bool full_length_enabled_; - size_t prefix_extractor_full_length_; }; } // namespace ROCKSDB_NAMESPACE diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 901736b9a869bebc322e4bc378f8c61709def1e7..ce2ac6fb765f112df0f60e278924d3c6d7f35655 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -318,6 +318,8 @@ DEFINE_bool(reverse_iterator, false, "When true use Prev rather than Next for iterators that do " "Seek and then Next"); +DEFINE_bool(auto_prefix_mode, false, "Set auto_prefix_mode for seek benchmark"); + DEFINE_int64(max_scan_distance, 0, "Used to define iterate_upper_bound (or iterate_lower_bound " "if FLAGS_reverse_iterator is set to true) when value is nonzero"); @@ -2563,7 +2565,7 @@ class Benchmark { private: std::shared_ptr cache_; std::shared_ptr compressed_cache_; - const SliceTransform* prefix_extractor_; + std::shared_ptr prefix_extractor_; DBWithColumnFamilies db_; std::vector multi_dbs_; int64_t num_; @@ -2966,7 +2968,9 @@ class Benchmark { Benchmark() : cache_(NewCache(FLAGS_cache_size)), compressed_cache_(NewCache(FLAGS_compressed_cache_size)), - prefix_extractor_(NewFixedPrefixTransform(FLAGS_prefix_size)), + prefix_extractor_(FLAGS_prefix_size != 0 + ? NewFixedPrefixTransform(FLAGS_prefix_size) + : nullptr), num_(FLAGS_num), key_size_(FLAGS_key_size), user_timestamp_size_(FLAGS_user_timestamp_size), @@ -3057,7 +3061,6 @@ class Benchmark { ~Benchmark() { DeleteDBs(); - delete prefix_extractor_; if (cache_.get() != nullptr) { // Clear cache reference first open_options_.write_buffer_manager.reset(); @@ -4008,10 +4011,7 @@ class Benchmark { FLAGS_fifo_compaction_allow_compaction); options.compaction_options_fifo.age_for_warm = FLAGS_fifo_age_for_warm; #endif // ROCKSDB_LITE - if (FLAGS_prefix_size != 0) { - options.prefix_extractor.reset( - NewFixedPrefixTransform(FLAGS_prefix_size)); - } + options.prefix_extractor = prefix_extractor_; if (FLAGS_use_uint64_comparator) { options.comparator = test::Uint64Comparator(); if (FLAGS_key_size != 8) { @@ -6508,6 +6508,7 @@ class Benchmark { } } } + options.auto_prefix_mode = FLAGS_auto_prefix_mode; std::unique_ptr key_guard; Slice key = AllocateKey(&key_guard); @@ -6537,6 +6538,14 @@ class Benchmark { &upper_bound); options.iterate_upper_bound = &upper_bound; } + } else if (FLAGS_auto_prefix_mode && prefix_extractor_ && + !FLAGS_reverse_iterator) { + // Set upper bound to next prefix + auto mutable_upper_bound = const_cast(upper_bound.data()); + std::memcpy(mutable_upper_bound, key.data(), prefix_size_); + mutable_upper_bound[prefix_size_ - 1]++; + upper_bound = Slice(upper_bound.data(), prefix_size_); + options.iterate_upper_bound = &upper_bound; } // Pick a Iterator to use