提交 158da7a6 编写于 作者: Y Yanqin Jin 提交者: Facebook Github Bot

Verify checksum before ingestion (#4916)

Summary:
before file ingestion (in preparation phase), verify the checksums of
the blocks of the external SST file, including properties block with global
seqno.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4916

Differential Revision: D13863501

Pulled By: riversand963

fbshipit-source-id: dc54697f970e3807832e2460f7228fcc7efe81ee
上级 d0d484b1
......@@ -6,6 +6,7 @@
* RocksDB may choose to preopen some files even if options.max_open_files != -1. This may make DB open slightly longer.
* For users of dictionary compression with ZSTD v0.7.0+, we now reuse the same digested dictionary when compressing each of an SST file's data blocks for faster compression speeds.
* For all users of dictionary compression who set `cache_index_and_filter_blocks == true`, we now store dictionary data used for decompression in the block cache for better control over memory usage. For users of ZSTD v1.1.4+ who compile with -DZSTD_STATIC_LINKING_ONLY, this includes a digested dictionary, which is used to increase decompression speed.
* Add support for block checksums verification for external SST files before ingestion.
### Public API Change
* CompactionPri = kMinOverlappingRatio also uses compensated file size, which boosts file with lots of tombstones to be compacted first.
......
此差异已折叠。
......@@ -316,6 +316,13 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
return status;
}
if (ingestion_options_.verify_checksums_before_ingest) {
status = table_reader->VerifyChecksum();
}
if (!status.ok()) {
return status;
}
// Get the external file properties
auto props = table_reader->GetTableProperties();
const auto& uprops = props->user_collected_properties;
......
......@@ -15,8 +15,9 @@
namespace rocksdb {
class ExternalSSTFileTest : public DBTestBase,
public ::testing::WithParamInterface<bool> {
class ExternalSSTFileTest
: public DBTestBase,
public ::testing::WithParamInterface<std::tuple<bool, bool>> {
public:
ExternalSSTFileTest() : DBTestBase("/external_sst_file_test") {
sst_files_dir_ = dbname_ + "/sst_files/";
......@@ -32,7 +33,7 @@ class ExternalSSTFileTest : public DBTestBase,
const Options options,
std::vector<std::pair<std::string, std::string>> data, int file_id = -1,
bool allow_global_seqno = false, bool write_global_seqno = false,
bool sort_data = false,
bool verify_checksums_before_ingest = true, bool sort_data = false,
std::map<std::string, std::string>* true_data = nullptr,
ColumnFamilyHandle* cfh = nullptr) {
// Generate a file id if not provided
......@@ -76,6 +77,7 @@ class ExternalSSTFileTest : public DBTestBase,
IngestExternalFileOptions ifo;
ifo.allow_global_seqno = allow_global_seqno;
ifo.write_global_seqno = allow_global_seqno ? write_global_seqno : false;
ifo.verify_checksums_before_ingest = verify_checksums_before_ingest;
if (cfh) {
s = db_->IngestExternalFile(cfh, {file_path}, ifo);
} else {
......@@ -155,31 +157,32 @@ class ExternalSSTFileTest : public DBTestBase,
Status GenerateAndAddExternalFile(
const Options options, std::vector<std::pair<int, std::string>> data,
int file_id = -1, bool allow_global_seqno = false,
bool write_global_seqno = false, bool sort_data = false,
bool write_global_seqno = false,
bool verify_checksums_before_ingest = true, bool sort_data = false,
std::map<std::string, std::string>* true_data = nullptr,
ColumnFamilyHandle* cfh = nullptr) {
std::vector<std::pair<std::string, std::string>> file_data;
for (auto& entry : data) {
file_data.emplace_back(Key(entry.first), entry.second);
}
return GenerateAndAddExternalFile(options, file_data, file_id,
allow_global_seqno, write_global_seqno,
sort_data, true_data, cfh);
return GenerateAndAddExternalFile(
options, file_data, file_id, allow_global_seqno, write_global_seqno,
verify_checksums_before_ingest, sort_data, true_data, cfh);
}
Status GenerateAndAddExternalFile(
const Options options, std::vector<int> keys, int file_id = -1,
bool allow_global_seqno = false, bool write_global_seqno = false,
bool sort_data = false,
bool verify_checksums_before_ingest = true, bool sort_data = false,
std::map<std::string, std::string>* true_data = nullptr,
ColumnFamilyHandle* cfh = nullptr) {
std::vector<std::pair<std::string, std::string>> file_data;
for (auto& k : keys) {
file_data.emplace_back(Key(k), Key(k) + ToString(file_id));
}
return GenerateAndAddExternalFile(options, file_data, file_id,
allow_global_seqno, write_global_seqno,
sort_data, true_data, cfh);
return GenerateAndAddExternalFile(
options, file_data, file_id, allow_global_seqno, write_global_seqno,
verify_checksums_before_ingest, sort_data, true_data, cfh);
}
Status DeprecatedAddFile(const std::vector<std::string>& files,
......@@ -1157,13 +1160,13 @@ TEST_P(ExternalSSTFileTest, PickedLevel) {
std::map<std::string, std::string> true_data;
// File 0 will go to last level (L3)
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 10}, -1, false, false,
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 10}, -1, false, false, true,
false, &true_data));
EXPECT_EQ(FilesPerLevel(), "0,0,0,1");
// File 1 will go to level L2 (since it overlap with file 0 in L3)
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 9}, -1, false, false, false,
&true_data));
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 9}, -1, false, false, true,
false, &true_data));
EXPECT_EQ(FilesPerLevel(), "0,0,1,1");
rocksdb::SyncPoint::GetInstance()->LoadDependency({
......@@ -1192,13 +1195,13 @@ TEST_P(ExternalSSTFileTest, PickedLevel) {
// This file overlaps with file 0 (L3), file 1 (L2) and the
// output of compaction going to L1
ASSERT_OK(GenerateAndAddExternalFile(options, {4, 7}, -1, false, false, false,
&true_data));
ASSERT_OK(GenerateAndAddExternalFile(options, {4, 7}, -1, false, false, true,
false, &true_data));
EXPECT_EQ(FilesPerLevel(), "5,0,1,1");
// This file does not overlap with any file or with the running compaction
ASSERT_OK(GenerateAndAddExternalFile(options, {9000, 9001}, -1, false, false,
false, &true_data));
true, false, &true_data));
EXPECT_EQ(FilesPerLevel(), "5,0,1,2");
// Hold compaction from finishing
......@@ -1318,7 +1321,7 @@ TEST_F(ExternalSSTFileTest, IngestNonExistingFile) {
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
// After full compaction, there should be only 1 file.
std::vector<std::string> files;
env_->GetChildren(dbname_, &files);
......@@ -1429,12 +1432,12 @@ TEST_F(ExternalSSTFileTest, PickedLevelDynamic) {
// This file overlaps with the output of the compaction (going to L3)
// so the file will be added to L0 since L3 is the base level
ASSERT_OK(GenerateAndAddExternalFile(options, {31, 32, 33, 34}, -1, false,
false, false, &true_data));
false, true, false, &true_data));
EXPECT_EQ(FilesPerLevel(), "5");
// This file does not overlap with the current running compactiong
ASSERT_OK(GenerateAndAddExternalFile(options, {9000, 9001}, -1, false, false,
false, &true_data));
true, false, &true_data));
EXPECT_EQ(FilesPerLevel(), "5,0,0,1");
// Hold compaction from finishing
......@@ -1449,25 +1452,25 @@ TEST_F(ExternalSSTFileTest, PickedLevelDynamic) {
Reopen(options);
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 15, 19}, -1, false, false,
false, &true_data));
true, false, &true_data));
ASSERT_EQ(FilesPerLevel(), "1,0,0,3");
ASSERT_OK(GenerateAndAddExternalFile(options, {1000, 1001, 1002}, -1, false,
false, false, &true_data));
false, true, false, &true_data));
ASSERT_EQ(FilesPerLevel(), "1,0,0,4");
ASSERT_OK(GenerateAndAddExternalFile(options, {500, 600, 700}, -1, false,
false, false, &true_data));
false, true, false, &true_data));
ASSERT_EQ(FilesPerLevel(), "1,0,0,5");
// File 5 overlaps with file 2 (L3 / base level)
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 10}, -1, false, false,
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 10}, -1, false, false, true,
false, &true_data));
ASSERT_EQ(FilesPerLevel(), "2,0,0,5");
// File 6 overlaps with file 2 (L3 / base level) and file 5 (L0)
ASSERT_OK(GenerateAndAddExternalFile(options, {3, 9}, -1, false, false, false,
&true_data));
ASSERT_OK(GenerateAndAddExternalFile(options, {3, 9}, -1, false, false, true,
false, &true_data));
ASSERT_EQ(FilesPerLevel(), "3,0,0,5");
// Verify data in files
......@@ -1486,7 +1489,7 @@ TEST_F(ExternalSSTFileTest, PickedLevelDynamic) {
// File 7 overlaps with file 4 (L3)
ASSERT_OK(GenerateAndAddExternalFile(options, {650, 651, 652}, -1, false,
false, false, &true_data));
false, true, false, &true_data));
ASSERT_EQ(FilesPerLevel(), "5,0,0,5");
VerifyDBFromMap(true_data, &kcnt, false);
......@@ -1626,7 +1629,8 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoRandomized) {
options.level0_slowdown_writes_trigger = 256;
options.level0_stop_writes_trigger = 256;
bool write_global_seqno = GetParam();
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
for (int iter = 0; iter < 2; iter++) {
bool write_to_memtable = (iter == 0);
DestroyAndReopen(options);
......@@ -1650,9 +1654,9 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoRandomized) {
true_data[entry.first] = entry.second;
}
} else {
ASSERT_OK(GenerateAndAddExternalFile(options, random_data, -1, true,
write_global_seqno, true,
&true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, random_data, -1, true, write_global_seqno,
verify_checksums_before_ingest, true, &true_data));
}
}
size_t kcnt = 0;
......@@ -1681,9 +1685,11 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) {
for (int i = 0; i <= 20; i++) {
file_data.emplace_back(Key(i), "L4");
}
bool write_global_seqno = GetParam();
ASSERT_OK(GenerateAndAddExternalFile(options, file_data, -1, true,
write_global_seqno, false, &true_data));
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
ASSERT_OK(GenerateAndAddExternalFile(
options, file_data, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, &true_data));
// This file dont overlap with anything in the DB, will go to L4
ASSERT_EQ("0,0,0,0,1", FilesPerLevel());
......@@ -1693,8 +1699,9 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) {
for (int i = 80; i <= 130; i++) {
file_data.emplace_back(Key(i), "L0");
}
ASSERT_OK(GenerateAndAddExternalFile(options, file_data, -1, true,
write_global_seqno, false, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, file_data, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, &true_data));
// This file overlap with the memtable, so it will flush it and add
// it self to L0
......@@ -1705,8 +1712,9 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) {
for (int i = 30; i <= 50; i++) {
file_data.emplace_back(Key(i), "L4");
}
ASSERT_OK(GenerateAndAddExternalFile(options, file_data, -1, true,
write_global_seqno, false, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, file_data, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, &true_data));
// This file dont overlap with anything in the DB and fit in L4 as well
ASSERT_EQ("2,0,0,0,2", FilesPerLevel());
......@@ -1716,8 +1724,9 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) {
for (int i = 10; i <= 40; i++) {
file_data.emplace_back(Key(i), "L3");
}
ASSERT_OK(GenerateAndAddExternalFile(options, file_data, -1, true,
write_global_seqno, false, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, file_data, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, &true_data));
// This file overlap with files in L4, we will ingest it in L3
ASSERT_EQ("2,0,0,1,2", FilesPerLevel());
......@@ -1740,17 +1749,20 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoMemtableFlush) {
&entries_in_memtable);
ASSERT_GE(entries_in_memtable, 1);
bool write_global_seqno = GetParam();
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
// No need for flush
ASSERT_OK(GenerateAndAddExternalFile(options, {90, 100, 110}, -1, true,
write_global_seqno, false, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {90, 100, 110}, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, &true_data));
db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
&entries_in_memtable);
ASSERT_GE(entries_in_memtable, 1);
// This file will flush the memtable
ASSERT_OK(GenerateAndAddExternalFile(options, {19, 20, 21}, -1, true,
write_global_seqno, false, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {19, 20, 21}, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, &true_data));
db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
&entries_in_memtable);
ASSERT_EQ(entries_in_memtable, 0);
......@@ -1764,15 +1776,17 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoMemtableFlush) {
ASSERT_GE(entries_in_memtable, 1);
// No need for flush, this file keys fit between the memtable keys
ASSERT_OK(GenerateAndAddExternalFile(options, {202, 203, 204}, -1, true,
write_global_seqno, false, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {202, 203, 204}, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, &true_data));
db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
&entries_in_memtable);
ASSERT_GE(entries_in_memtable, 1);
// This file will flush the memtable
ASSERT_OK(GenerateAndAddExternalFile(options, {206, 207}, -1, true, false,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {206, 207}, -1, true, false, write_global_seqno,
verify_checksums_before_ingest, &true_data));
db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
&entries_in_memtable);
ASSERT_EQ(entries_in_memtable, 0);
......@@ -1790,13 +1804,16 @@ TEST_P(ExternalSSTFileTest, L0SortingIssue) {
ASSERT_OK(Put(Key(1), "memtable"));
ASSERT_OK(Put(Key(10), "memtable"));
bool write_global_seqno = GetParam();
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
// No Flush needed, No global seqno needed, Ingest in L1
ASSERT_OK(GenerateAndAddExternalFile(options, {7, 8}, -1, true,
write_global_seqno, false));
write_global_seqno,
verify_checksums_before_ingest, false));
// No Flush needed, but need a global seqno, Ingest in L0
ASSERT_OK(GenerateAndAddExternalFile(options, {7, 8}, -1, true,
write_global_seqno, false));
write_global_seqno,
verify_checksums_before_ingest, false));
printf("%s\n", FilesPerLevel().c_str());
// Overwrite what we added using external files
......@@ -2032,11 +2049,12 @@ TEST_P(ExternalSSTFileTest, IngestionListener) {
options.listeners.emplace_back(listener);
CreateAndReopenWithCF({"koko", "toto"}, options);
bool write_global_seqno = GetParam();
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
// Ingest into default cf
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 2}, -1, true,
write_global_seqno, true, nullptr,
handles_[0]));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 2}, -1, true, write_global_seqno,
verify_checksums_before_ingest, true, nullptr, handles_[0]));
ASSERT_EQ(listener->ingested_files.size(), 1);
ASSERT_EQ(listener->ingested_files.back().cf_name, "default");
ASSERT_EQ(listener->ingested_files.back().global_seqno, 0);
......@@ -2046,9 +2064,9 @@ TEST_P(ExternalSSTFileTest, IngestionListener) {
"default");
// Ingest into cf1
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 2}, -1, true,
write_global_seqno, true, nullptr,
handles_[1]));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 2}, -1, true, write_global_seqno,
verify_checksums_before_ingest, true, nullptr, handles_[1]));
ASSERT_EQ(listener->ingested_files.size(), 2);
ASSERT_EQ(listener->ingested_files.back().cf_name, "koko");
ASSERT_EQ(listener->ingested_files.back().global_seqno, 0);
......@@ -2058,9 +2076,9 @@ TEST_P(ExternalSSTFileTest, IngestionListener) {
"koko");
// Ingest into cf2
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 2}, -1, true,
write_global_seqno, true, nullptr,
handles_[2]));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 2}, -1, true, write_global_seqno,
verify_checksums_before_ingest, true, nullptr, handles_[2]));
ASSERT_EQ(listener->ingested_files.size(), 3);
ASSERT_EQ(listener->ingested_files.back().cf_name, "toto");
ASSERT_EQ(listener->ingested_files.back().global_seqno, 0);
......@@ -2126,7 +2144,8 @@ TEST_P(ExternalSSTFileTest, IngestBehind) {
IngestExternalFileOptions ifo;
ifo.allow_global_seqno = true;
ifo.ingest_behind = true;
ifo.write_global_seqno = GetParam();
ifo.write_global_seqno = std::get<0>(GetParam());
ifo.verify_checksums_before_ingest = std::get<1>(GetParam());
// Can't ingest behind since allow_ingest_behind isn't set to true
ASSERT_NOK(GenerateAndAddExternalFileIngestBehind(options, ifo,
......@@ -2215,7 +2234,10 @@ TEST_F(ExternalSSTFileTest, SkipBloomFilter) {
}
INSTANTIATE_TEST_CASE_P(ExternalSSTFileTest, ExternalSSTFileTest,
testing::Bool());
testing::Values(std::make_tuple(false, false),
std::make_tuple(false, true),
std::make_tuple(true, false),
std::make_tuple(true, true)));
} // namespace rocksdb
......
......@@ -295,7 +295,7 @@ struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions {
std::vector<DbPath> cf_paths;
// Compaction concurrent thread limiter for the column family.
// If non-nullptr, use given concurrent thread limiter to control
// If non-nullptr, use given concurrent thread limiter to control
// the max outstanding compaction tasks. Limiter can be shared with
// multiple column families across db instances.
//
......@@ -1344,6 +1344,11 @@ struct IngestExternalFileOptions {
// 2. Without writing external SST file, it's possible to do checksum.
// We have a plan to set this option to false by default in the future.
bool write_global_seqno = true;
// Set to true if you would like to verify the checksums of each block of the
// external SST file before ingestion.
// Warning: setting this to true causes slowdown in file ingestion because
// the external SST file has to be read.
bool verify_checksums_before_ingest = false;
};
// TraceOptions is used for StartTrace
......
......@@ -634,6 +634,9 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
}
assert(r->status.ok());
TEST_SYNC_POINT_CALLBACK(
"BlockBasedTableBuilder::WriteRawBlock:TamperWithChecksum",
static_cast<char*>(trailer));
r->status = r->file->Append(Slice(trailer, kBlockTrailerSize));
if (r->status.ok()) {
r->status = InsertBlockInCache(block_contents, type, handle);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册