diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index d3cf74d1edbb1a9240086f954cdcd24e216e6f3b..8703a0eb584c1046e4e6414323e9ef375b5b1c38 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -217,6 +217,11 @@ struct BlockBasedTableOptions { // This option only affects newly written tables. When reading exising tables, // the information about version is read from the footer. uint32_t format_version = 2; + + // Store index blocks on disk in compressed format. Changing this option to + // false will avoid the overhead of decompression if index blocks are evicted + // and read back + bool enable_index_compression = true; }; // Table Properties that are specific to block-based table properties. diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 34157a0ecfeb090fa0645044bdde9d3f5323bf7e..d64473c3a38a0bbc5d6e7dbb2d3f4c99e4c8102a 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -150,7 +150,8 @@ TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) { "filter_policy=bloomfilter:4:true;whole_key_filtering=1;" "format_version=1;" "hash_index_allow_collision=false;" - "verify_compression=true;read_amp_bytes_per_bit=0", + "verify_compression=true;read_amp_bytes_per_bit=0;" + "enable_index_compression=false", new_bbto)); ASSERT_EQ(unset_bytes_base, diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index fc3e8f8296262bde3d7ac764ed8526ff8be956d1..7b030382f2879dc367d825ad63d2622a68e44115 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -783,9 +783,12 @@ Status BlockBasedTableBuilder::Finish() { WriteRawBlock(meta_index_builder.Finish(), kNoCompression, &metaindex_block_handle); - const bool is_data_block = true; - WriteBlock(index_blocks.index_block_contents, &index_block_handle, - !is_data_block); + if (r->table_options.enable_index_compression) { + WriteBlock(index_blocks.index_block_contents, &index_block_handle, false); + } else { + WriteRawBlock(index_blocks.index_block_contents, kNoCompression, + &index_block_handle); + } // If there are more index partitions, finish them and write them out Status& s = index_builder_status; while (s.IsIncomplete()) { @@ -793,8 +796,13 @@ Status BlockBasedTableBuilder::Finish() { if (!s.ok() && !s.IsIncomplete()) { return s; } - WriteBlock(index_blocks.index_block_contents, &index_block_handle, - !is_data_block); + if (r->table_options.enable_index_compression) { + WriteBlock(index_blocks.index_block_contents, &index_block_handle, + false); + } else { + WriteRawBlock(index_blocks.index_block_contents, kNoCompression, + &index_block_handle); + } // The last index_block_handle will be for the partition index block } } diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index 2f3b6cd84466d917afb30d7fcf2cc7c44e12a5fb..5c83b3d13aabe4421c5f1dafcd3830ee06cec97f 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -223,6 +223,9 @@ std::string BlockBasedTableFactory::GetPrintableTableOptions() const { snprintf(buffer, kBufferSize, " format_version: %d\n", table_options_.format_version); ret.append(buffer); + snprintf(buffer, kBufferSize, " enable_index_compression: %d\n", + table_options_.enable_index_compression); + ret.append(buffer); return ret; } diff --git a/table/block_based_table_factory.h b/table/block_based_table_factory.h index 39e3eac0b37b18983b541ba69e73e8470ae80815..a5eba7eff617ec2a78dadea68ea4c726bef12c84 100644 --- a/table/block_based_table_factory.h +++ b/table/block_based_table_factory.h @@ -152,6 +152,9 @@ static std::unordered_map OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}}, {"read_amp_bytes_per_bit", {offsetof(struct BlockBasedTableOptions, read_amp_bytes_per_bit), - OptionType::kSizeT, OptionVerificationType::kNormal, false, 0}}}; + OptionType::kSizeT, OptionVerificationType::kNormal, false, 0}}, + {"enable_index_compression", + {offsetof(struct BlockBasedTableOptions, enable_index_compression), + OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}}}; #endif // !ROCKSDB_LITE } // namespace rocksdb diff --git a/table/table_test.cc b/table/table_test.cc index 178cf4243d7f2091371cfe8a10d306fe36669b9d..a4856758202e37e5baa330ff8f45b4c8f72742d1 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -1003,7 +1003,10 @@ class TableTest : public testing::Test { }; class GeneralTableTest : public TableTest {}; -class BlockBasedTableTest : public TableTest {}; +class BlockBasedTableTest : public TableTest { + protected: + uint64_t IndexUncompressedHelper(bool indexCompress); +}; class PlainTableTest : public TableTest {}; class TablePropertyTest : public testing::Test {}; @@ -1064,13 +1067,17 @@ TEST_F(BlockBasedTableTest, BasicBlockBasedTableProperties) { stl_wrappers::KVMap kvmap; Options options; options.compression = kNoCompression; + options.statistics = CreateDBStatistics(); + options.statistics->stats_level_ = StatsLevel::kAll; BlockBasedTableOptions table_options; table_options.block_restart_interval = 1; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - const ImmutableCFOptions ioptions(options); + ImmutableCFOptions ioptions(options); + ioptions.statistics = options.statistics.get(); c.Finish(options, ioptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); + ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_NOT_COMPRESSED), 0); auto& props = *c.GetTableReader()->GetTableProperties(); ASSERT_EQ(kvmap.size(), props.num_entries); @@ -1094,6 +1101,39 @@ TEST_F(BlockBasedTableTest, BasicBlockBasedTableProperties) { c.ResetTableReader(); } +uint64_t BlockBasedTableTest::IndexUncompressedHelper(bool compressed) { + TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); + constexpr size_t kNumKeys = 10000; + + for (size_t k = 0; k < kNumKeys; ++k) { + c.Add("key" + ToString(k), "val" + ToString(k)); + } + + std::vector keys; + stl_wrappers::KVMap kvmap; + Options options; + options.compression = kSnappyCompression; + options.statistics = CreateDBStatistics(); + options.statistics->stats_level_ = StatsLevel::kAll; + BlockBasedTableOptions table_options; + table_options.block_restart_interval = 1; + table_options.enable_index_compression = compressed; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + ImmutableCFOptions ioptions(options); + ioptions.statistics = options.statistics.get(); + c.Finish(options, ioptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + c.ResetTableReader(); + return options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED); +} +TEST_F(BlockBasedTableTest, IndexUncompressed) { + uint64_t tbl1_compressed_cnt = IndexUncompressedHelper(true); + uint64_t tbl2_compressed_cnt = IndexUncompressedHelper(false); + // tbl1_compressed_cnt should include 1 index block + EXPECT_EQ(tbl2_compressed_cnt + 1, tbl1_compressed_cnt); +} + TEST_F(BlockBasedTableTest, BlockBasedTableProperties2) { TableConstructor c(&reverse_key_comparator); std::vector keys; diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 064c5ccea6d42d6b336773e383ad125606be9570..e020712db0f0787337648e9029c4304781c5180c 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -442,6 +442,10 @@ DEFINE_int32(read_amp_bytes_per_bit, rocksdb::BlockBasedTableOptions().read_amp_bytes_per_bit, "Number of bytes per bit to be used in block read-amp bitmap"); +DEFINE_bool(enable_index_compression, + rocksdb::BlockBasedTableOptions().enable_index_compression, + "Compress the index block"); + DEFINE_int64(compressed_cache_size, -1, "Number of bytes to use as a cache of compressed data."); @@ -3110,6 +3114,8 @@ void VerifyDBFromDB(std::string& truth_db_name) { block_based_options.filter_policy = filter_policy_; block_based_options.format_version = 2; block_based_options.read_amp_bytes_per_bit = FLAGS_read_amp_bytes_per_bit; + block_based_options.enable_index_compression = + FLAGS_enable_index_compression; if (FLAGS_read_cache_path != "") { #ifndef ROCKSDB_LITE Status rc_status;