diff --git a/db/db_test.cc b/db/db_test.cc index dfc23196962210e25129d6432476bdf864c35eae..d39cbfe294068ff8e263da896230069f20827766 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -5272,6 +5272,7 @@ class DBTestRandomized : public DBTest, option_configs.push_back(option_config); } } + option_configs.push_back(kBlockBasedTableWithIndexRestartInterval); return option_configs; } }; diff --git a/db/db_test_util.cc b/db/db_test_util.cc index f2906c7ca9208b4f4c1b4789835defb05229de4c..222dc715d3ba874fae9713db0881c4d9780a0611 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -339,6 +339,10 @@ Options DBTestBase::CurrentOptions( options.prefix_extractor.reset(NewNoopTransform()); break; } + case kBlockBasedTableWithIndexRestartInterval: { + table_options.index_block_restart_interval = 8; + break; + } case kOptimizeFiltersForHits: { options.optimize_filters_for_hits = true; set_block_based_table_factory = true; diff --git a/db/db_test_util.h b/db/db_test_util.h index b993af8cb5f7be2059d3e8b419e487e69f408e42..cde352dfeccb26c565dc4780640e16665a7fd93c 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -529,6 +529,7 @@ class DBTestBase : public testing::Test { kEnd = 31, kLevelSubcompactions = 31, kUniversalSubcompactions = 32, + kBlockBasedTableWithIndexRestartInterval = 33, }; int option_config_; diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 2e1a91de95fc3f5dd75922aebe8321063634d4f7..157d4274c01a90039b10d6b402a946003d13d5be 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -120,6 +120,9 @@ struct BlockBasedTableOptions { // value will be silently overwritten with 1. int block_restart_interval = 16; + // Same as block_restart_interval but used for the index block. + int index_block_restart_interval = 1; + // Use delta encoding to compress keys in blocks. // Iterator::PinData() requires this option to be disabled. // diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index 006908eaa59b6c8111d2c9d93b60f3be533cc83b..ee8c3dd7c0ce4d089c5f8667204e1a48d5ab5448 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -113,15 +113,17 @@ class IndexBuilder { // // Optimizations: // 1. Made block's `block_restart_interval` to be 1, which will avoid linear -// search when doing index lookup. +// search when doing index lookup (can be disabled by setting +// index_block_restart_interval). // 2. Shorten the key length for index block. Other than honestly using the // last key in the data block as the index key, we instead find a shortest // substitute key that serves the same function. class ShortenedIndexBuilder : public IndexBuilder { public: - explicit ShortenedIndexBuilder(const Comparator* comparator) + explicit ShortenedIndexBuilder(const Comparator* comparator, + int index_block_restart_interval) : IndexBuilder(comparator), - index_block_builder_(1 /* block_restart_interval == 1 */) {} + index_block_builder_(index_block_restart_interval) {} virtual void AddIndexEntry(std::string* last_key_in_current_block, const Slice* first_key_in_next_block, @@ -178,9 +180,10 @@ class ShortenedIndexBuilder : public IndexBuilder { class HashIndexBuilder : public IndexBuilder { public: explicit HashIndexBuilder(const Comparator* comparator, - const SliceTransform* hash_key_extractor) + const SliceTransform* hash_key_extractor, + int index_block_restart_interval) : IndexBuilder(comparator), - primary_index_builder_(comparator), + primary_index_builder_(comparator, index_block_restart_interval), hash_key_extractor_(hash_key_extractor) {} virtual void AddIndexEntry(std::string* last_key_in_current_block, @@ -266,13 +269,16 @@ namespace { // Create a index builder based on its type. IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator, - const SliceTransform* prefix_extractor) { + const SliceTransform* prefix_extractor, + int index_block_restart_interval) { switch (type) { case BlockBasedTableOptions::kBinarySearch: { - return new ShortenedIndexBuilder(comparator); + return new ShortenedIndexBuilder(comparator, + index_block_restart_interval); } case BlockBasedTableOptions::kHashSearch: { - return new HashIndexBuilder(comparator, prefix_extractor); + return new HashIndexBuilder(comparator, prefix_extractor, + index_block_restart_interval); } default: { assert(!"Do not recognize the index type "); @@ -484,9 +490,10 @@ struct BlockBasedTableBuilder::Rep { data_block(table_options.block_restart_interval, table_options.use_delta_encoding), internal_prefix_transform(_ioptions.prefix_extractor), - index_builder(CreateIndexBuilder(table_options.index_type, - &internal_comparator, - &this->internal_prefix_transform)), + index_builder( + CreateIndexBuilder(table_options.index_type, &internal_comparator, + &this->internal_prefix_transform, + table_options.index_block_restart_interval)), compression_type(_compression_type), compression_opts(_compression_opts), filter_block(skip_filters ? nullptr : CreateFilterBlockBuilder( diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index a6484c4ee8558a42679ac157a4513fd21d1e847e..7b38c2136f9afbb353a0c203d93093fe1e9d1276 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -42,6 +42,9 @@ BlockBasedTableFactory::BlockBasedTableFactory( if (table_options_.block_restart_interval < 1) { table_options_.block_restart_interval = 1; } + if (table_options_.index_block_restart_interval < 1) { + table_options_.index_block_restart_interval = 1; + } } Status BlockBasedTableFactory::NewTableReader( @@ -150,6 +153,9 @@ std::string BlockBasedTableFactory::GetPrintableTableOptions() const { snprintf(buffer, kBufferSize, " block_restart_interval: %d\n", table_options_.block_restart_interval); ret.append(buffer); + snprintf(buffer, kBufferSize, " index_block_restart_interval: %d\n", + table_options_.index_block_restart_interval); + ret.append(buffer); snprintf(buffer, kBufferSize, " filter_policy: %s\n", table_options_.filter_policy == nullptr ? "nullptr" : table_options_.filter_policy->Name()); diff --git a/table/table_test.cc b/table/table_test.cc index 0a84f2750e7981e9fde21e2f768e39be6dc78d2a..2bd28ca0adf225f800a43df421e90436f22b55f6 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -637,6 +637,7 @@ class HarnessTest : public testing::Test { new FlushBlockBySizePolicyFactory()); table_options_.block_size = 256; table_options_.block_restart_interval = args.restart_interval; + table_options_.index_block_restart_interval = args.restart_interval; table_options_.format_version = args.format_version; options_.table_factory.reset( new BlockBasedTableFactory(table_options_)); @@ -2282,6 +2283,67 @@ TEST_F(HarnessTest, FooterTests) { } } +class IndexBlockRestartIntervalTest + : public BlockBasedTableTest, + public ::testing::WithParamInterface { + public: + static std::vector GetRestartValues() { return {-1, 0, 1, 8, 16, 32}; } +}; + +INSTANTIATE_TEST_CASE_P( + IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest, + ::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues())); + +TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) { + const int kKeysInTable = 10000; + const int kKeySize = 100; + const int kValSize = 500; + + int index_block_restart_interval = GetParam(); + + Options options; + BlockBasedTableOptions table_options; + table_options.block_size = 64; // small block size to get big index block + table_options.index_block_restart_interval = index_block_restart_interval; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + + TableConstructor c(BytewiseComparator()); + static Random rnd(301); + for (int i = 0; i < kKeysInTable; i++) { + InternalKey k(RandomString(&rnd, kKeySize), 0, kTypeValue); + c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize)); + } + + std::vector keys; + stl_wrappers::KVMap kvmap; + std::unique_ptr comparator( + new InternalKeyComparator(BytewiseComparator())); + const ImmutableCFOptions ioptions(options); + c.Finish(options, ioptions, table_options, *comparator, &keys, &kvmap); + auto reader = c.GetTableReader(); + + std::unique_ptr db_iter(reader->NewIterator(ReadOptions())); + + // Test point lookup + for (auto& kv : kvmap) { + db_iter->Seek(kv.first); + + ASSERT_TRUE(db_iter->Valid()); + ASSERT_OK(db_iter->status()); + ASSERT_EQ(db_iter->key(), kv.first); + ASSERT_EQ(db_iter->value(), kv.second); + } + + // Test iterating + auto kv_iter = kvmap.begin(); + for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { + ASSERT_EQ(db_iter->key(), kv_iter->first); + ASSERT_EQ(db_iter->value(), kv_iter->second); + kv_iter++; + } + ASSERT_EQ(kv_iter, kvmap.end()); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/util/options_helper.h b/util/options_helper.h index 4c4555aca8b30c2e13a6b9fbc4dea7f688d539bf..b0636adc562cdbbb7c485089bd9bd9630d4fd8c0 100644 --- a/util/options_helper.h +++ b/util/options_helper.h @@ -486,6 +486,9 @@ static std::unordered_mapUniform(10000000); opt.block_size_deviation = rnd->Uniform(100); opt.block_restart_interval = rnd->Uniform(100); + opt.index_block_restart_interval = rnd->Uniform(100); opt.whole_key_filtering = rnd->Uniform(2); return opt;