提交 4dafa5b2 编写于 作者: Y Yu Zhang 提交者: Facebook GitHub Bot

switch to use RocksDB UnorderedMap (#11507)

Summary:
Switch from std::unordered_map to RocksDB UnorderedMap for all the places that logging user-defined timestamp size in WAL used.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11507

Test Plan:
```
make all check
```

Reviewed By: ltamasi

Differential Revision: D46448975

Pulled By: jowlyzhang

fbshipit-source-id: bdb4d56a723b697a33daaf0f856a61d49a367a99
上级 4aa52d89
......@@ -705,12 +705,12 @@ class ColumnFamilySet {
Version* dummy_version,
const ColumnFamilyOptions& options);
const std::unordered_map<uint32_t, size_t>&
GetRunningColumnFamiliesTimestampSize() const {
const UnorderedMap<uint32_t, size_t>& GetRunningColumnFamiliesTimestampSize()
const {
return running_ts_sz_;
}
const std::unordered_map<uint32_t, size_t>&
const UnorderedMap<uint32_t, size_t>&
GetColumnFamiliesTimestampSizeForRecord() const {
return ts_sz_for_record_;
}
......@@ -744,10 +744,10 @@ class ColumnFamilySet {
// the same requirements as `column_families_` and `column_family_data_`.
// Mapping from column family id to user-defined timestamp size for all
// running column families.
std::unordered_map<uint32_t, size_t> running_ts_sz_;
UnorderedMap<uint32_t, size_t> running_ts_sz_;
// Mapping from column family id to user-defined timestamp size for
// column families with non-zero user-defined timestamp size.
std::unordered_map<uint32_t, size_t> ts_sz_for_record_;
UnorderedMap<uint32_t, size_t> ts_sz_for_record_;
uint32_t max_column_family_;
const FileOptions file_options_;
......
......@@ -1187,7 +1187,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
std::string scratch;
Slice record;
const std::unordered_map<uint32_t, size_t>& running_ts_sz =
const UnorderedMap<uint32_t, size_t>& running_ts_sz =
versions_->GetRunningColumnFamiliesTimestampSize();
TEST_SYNC_POINT_CALLBACK("DBImpl::RecoverLogFiles:BeforeReadWal",
......@@ -1213,7 +1213,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
return status;
}
const std::unordered_map<uint32_t, size_t>& record_ts_sz =
const UnorderedMap<uint32_t, size_t>& record_ts_sz =
reader.GetRecordedTimestampSize();
// TODO(yuzhangyu): update mode to kReconcileInconsistency when user
// comparator can be changed.
......
......@@ -199,7 +199,7 @@ Status DBImplSecondary::RecoverLogFiles(
assert(reader != nullptr);
}
const std::unordered_map<uint32_t, size_t>& running_ts_sz =
const UnorderedMap<uint32_t, size_t>& running_ts_sz =
versions_->GetRunningColumnFamiliesTimestampSize();
for (auto log_number : log_numbers) {
auto it = log_readers_.find(log_number);
......@@ -228,7 +228,7 @@ Status DBImplSecondary::RecoverLogFiles(
if (!status.ok()) {
break;
}
const std::unordered_map<uint32_t, size_t>& record_ts_sz =
const UnorderedMap<uint32_t, size_t>& record_ts_sz =
reader->GetRecordedTimestampSize();
status = HandleWriteBatchTimestampSizeDifference(
&batch, running_ts_sz, record_ts_sz,
......
......@@ -20,6 +20,7 @@
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "util/compression.h"
#include "util/hash_containers.h"
#include "util/udt_util.h"
#include "util/xxhash.h"
......@@ -79,7 +80,7 @@ class Reader {
// Return the recorded user-defined timestamp size that have been read so
// far. This only applies to WAL logs.
const std::unordered_map<uint32_t, size_t>& GetRecordedTimestampSize() const {
const UnorderedMap<uint32_t, size_t>& GetRecordedTimestampSize() const {
return recorded_cf_to_ts_sz_;
}
......@@ -165,7 +166,7 @@ class Reader {
// The recorded user-defined timestamp sizes that have been read so far. This
// is only for WAL logs.
std::unordered_map<uint32_t, size_t> recorded_cf_to_ts_sz_;
UnorderedMap<uint32_t, size_t> recorded_cf_to_ts_sz_;
// Extend record types with the following special values
enum {
......
......@@ -182,9 +182,8 @@ class LogTest
Slice* get_reader_contents() { return &reader_contents_; }
void Write(
const std::string& msg,
const std::unordered_map<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
void Write(const std::string& msg,
const UnorderedMap<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
if (cf_to_ts_sz != nullptr && !cf_to_ts_sz->empty()) {
ASSERT_OK(writer_->MaybeAddUserDefinedTimestampSizeRecord(*cf_to_ts_sz));
}
......@@ -193,10 +192,9 @@ class LogTest
size_t WrittenBytes() const { return dest_contents().size(); }
std::string Read(
const WALRecoveryMode wal_recovery_mode =
WALRecoveryMode::kTolerateCorruptedTailRecords,
std::unordered_map<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
std::string Read(const WALRecoveryMode wal_recovery_mode =
WALRecoveryMode::kTolerateCorruptedTailRecords,
UnorderedMap<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
std::string scratch;
Slice record;
bool ret = false;
......@@ -270,9 +268,8 @@ class LogTest
}
void CheckRecordAndTimestampSize(
std::string record,
std::unordered_map<uint32_t, size_t>& expected_ts_sz) {
std::unordered_map<uint32_t, size_t> recorded_ts_sz;
std::string record, UnorderedMap<uint32_t, size_t>& expected_ts_sz) {
UnorderedMap<uint32_t, size_t> recorded_ts_sz;
ASSERT_EQ(record,
Read(WALRecoveryMode::
kTolerateCorruptedTailRecords /* wal_recovery_mode */,
......@@ -297,18 +294,18 @@ TEST_P(LogTest, ReadWrite) {
}
TEST_P(LogTest, ReadWriteWithTimestampSize) {
std::unordered_map<uint32_t, size_t> ts_sz_one = {
UnorderedMap<uint32_t, size_t> ts_sz_one = {
{1, sizeof(uint64_t)},
};
Write("foo", &ts_sz_one);
Write("bar");
std::unordered_map<uint32_t, size_t> ts_sz_two = {{2, sizeof(char)}};
UnorderedMap<uint32_t, size_t> ts_sz_two = {{2, sizeof(char)}};
Write("", &ts_sz_two);
Write("xxxx");
CheckRecordAndTimestampSize("foo", ts_sz_one);
CheckRecordAndTimestampSize("bar", ts_sz_one);
std::unordered_map<uint32_t, size_t> expected_ts_sz_two;
UnorderedMap<uint32_t, size_t> expected_ts_sz_two;
// User-defined timestamp size records are accumulated and applied to
// subsequent records.
expected_ts_sz_two.insert(ts_sz_one.begin(), ts_sz_one.end());
......@@ -320,10 +317,9 @@ TEST_P(LogTest, ReadWriteWithTimestampSize) {
}
TEST_P(LogTest, ReadWriteWithTimestampSizeZeroTimestampIgnored) {
std::unordered_map<uint32_t, size_t> ts_sz_one = {{1, sizeof(uint64_t)}};
UnorderedMap<uint32_t, size_t> ts_sz_one = {{1, sizeof(uint64_t)}};
Write("foo", &ts_sz_one);
std::unordered_map<uint32_t, size_t> ts_sz_two(ts_sz_one.begin(),
ts_sz_one.end());
UnorderedMap<uint32_t, size_t> ts_sz_two(ts_sz_one.begin(), ts_sz_one.end());
ts_sz_two.insert(std::make_pair(2, 0));
Write("bar", &ts_sz_two);
......@@ -749,7 +745,7 @@ TEST_P(LogTest, RecycleWithTimestampSize) {
if (!recyclable_log) {
return; // test is only valid for recycled logs
}
std::unordered_map<uint32_t, size_t> ts_sz_one = {
UnorderedMap<uint32_t, size_t> ts_sz_one = {
{1, sizeof(uint32_t)},
};
Write("foo", &ts_sz_one);
......@@ -765,7 +761,7 @@ TEST_P(LogTest, RecycleWithTimestampSize) {
std::unique_ptr<WritableFileWriter> dest_holder(new WritableFileWriter(
std::move(sink), "" /* don't care */, FileOptions()));
Writer recycle_writer(std::move(dest_holder), 123, true);
std::unordered_map<uint32_t, size_t> ts_sz_two = {
UnorderedMap<uint32_t, size_t> ts_sz_two = {
{2, sizeof(uint64_t)},
};
ASSERT_OK(recycle_writer.MaybeAddUserDefinedTimestampSizeRecord(ts_sz_two));
......@@ -1039,18 +1035,18 @@ TEST_P(CompressionLogTest, ReadWriteWithTimestampSize) {
return;
}
ASSERT_OK(SetupTestEnv());
std::unordered_map<uint32_t, size_t> ts_sz_one = {
UnorderedMap<uint32_t, size_t> ts_sz_one = {
{1, sizeof(uint64_t)},
};
Write("foo", &ts_sz_one);
Write("bar");
std::unordered_map<uint32_t, size_t> ts_sz_two = {{2, sizeof(char)}};
UnorderedMap<uint32_t, size_t> ts_sz_two = {{2, sizeof(char)}};
Write("", &ts_sz_two);
Write("xxxx");
CheckRecordAndTimestampSize("foo", ts_sz_one);
CheckRecordAndTimestampSize("bar", ts_sz_one);
std::unordered_map<uint32_t, size_t> expected_ts_sz_two;
UnorderedMap<uint32_t, size_t> expected_ts_sz_two;
// User-defined timestamp size records are accumulated and applied to
// subsequent records.
expected_ts_sz_two.insert(ts_sz_one.begin(), ts_sz_one.end());
......
......@@ -197,7 +197,7 @@ IOStatus Writer::AddCompressionTypeRecord() {
}
IOStatus Writer::MaybeAddUserDefinedTimestampSizeRecord(
const std::unordered_map<uint32_t, size_t>& cf_to_ts_sz,
const UnorderedMap<uint32_t, size_t>& cf_to_ts_sz,
Env::IOPriority rate_limiter_priority) {
std::vector<std::pair<uint32_t, size_t>> ts_sz_to_record;
for (const auto& [cf_id, ts_sz] : cf_to_ts_sz) {
......
......@@ -20,6 +20,7 @@
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "util/compression.h"
#include "util/hash_containers.h"
namespace ROCKSDB_NAMESPACE {
......@@ -95,7 +96,7 @@ class Writer {
// kRecyclableUserDefinedTimestampSizeType for these column families.
// This timestamp size record applies to all subsequent records.
IOStatus MaybeAddUserDefinedTimestampSizeRecord(
const std::unordered_map<uint32_t, size_t>& cf_to_ts_sz,
const UnorderedMap<uint32_t, size_t>& cf_to_ts_sz,
Env::IOPriority rate_limiter_priority = Env::IO_TOTAL);
WritableFileWriter* file() { return dest_.get(); }
......@@ -137,7 +138,7 @@ class Writer {
// The recorded user-defined timestamp size that have been written so far.
// Since the user-defined timestamp size cannot be changed while the DB is
// running, existing entry in this map cannot be updated.
std::unordered_map<uint32_t, size_t> recorded_cf_to_ts_sz_;
UnorderedMap<uint32_t, size_t> recorded_cf_to_ts_sz_;
};
} // namespace log
......
......@@ -394,7 +394,7 @@ class Repairer {
auto cf_mems = new ColumnFamilyMemTablesImpl(vset_.GetColumnFamilySet());
// Read all the records and add to a memtable
const std::unordered_map<uint32_t, size_t>& running_ts_sz =
const UnorderedMap<uint32_t, size_t>& running_ts_sz =
vset_.GetRunningColumnFamiliesTimestampSize();
std::string scratch;
Slice record;
......@@ -409,7 +409,7 @@ class Repairer {
}
Status record_status = WriteBatchInternal::SetContents(&batch, record);
if (record_status.ok()) {
const std::unordered_map<uint32_t, size_t>& record_ts_sz =
const UnorderedMap<uint32_t, size_t>& record_ts_sz =
reader.GetRecordedTimestampSize();
record_status = HandleWriteBatchTimestampSizeDifference(
&batch, running_ts_sz, record_ts_sz,
......
......@@ -1468,12 +1468,12 @@ class VersionSet {
ColumnFamilySet* GetColumnFamilySet() { return column_family_set_.get(); }
const std::unordered_map<uint32_t, size_t>&
GetRunningColumnFamiliesTimestampSize() const {
const UnorderedMap<uint32_t, size_t>& GetRunningColumnFamiliesTimestampSize()
const {
return column_family_set_->GetRunningColumnFamiliesTimestampSize();
}
const std::unordered_map<uint32_t, size_t>&
const UnorderedMap<uint32_t, size_t>&
GetColumnFamiliesTimestampSizeForRecord() const {
return column_family_set_->GetColumnFamiliesTimestampSizeForRecord();
}
......
......@@ -44,8 +44,8 @@ RecoveryType GetRecoveryType(const size_t running_ts_sz,
}
bool AllRunningColumnFamiliesConsistent(
const std::unordered_map<uint32_t, size_t>& running_ts_sz,
const std::unordered_map<uint32_t, size_t>& record_ts_sz) {
const UnorderedMap<uint32_t, size_t>& running_ts_sz,
const UnorderedMap<uint32_t, size_t>& record_ts_sz) {
for (const auto& [cf_id, ts_sz] : running_ts_sz) {
auto record_it = record_ts_sz.find(cf_id);
RecoveryType recovery_type =
......@@ -61,8 +61,8 @@ bool AllRunningColumnFamiliesConsistent(
Status CheckWriteBatchTimestampSizeConsistency(
const WriteBatch* batch,
const std::unordered_map<uint32_t, size_t>& running_ts_sz,
const std::unordered_map<uint32_t, size_t>& record_ts_sz,
const UnorderedMap<uint32_t, size_t>& running_ts_sz,
const UnorderedMap<uint32_t, size_t>& record_ts_sz,
TimestampSizeConsistencyMode check_mode, bool* ts_need_recovery) {
std::vector<uint32_t> column_family_ids;
Status status =
......@@ -103,8 +103,8 @@ Status CheckWriteBatchTimestampSizeConsistency(
} // namespace
TimestampRecoveryHandler::TimestampRecoveryHandler(
const std::unordered_map<uint32_t, size_t>& running_ts_sz,
const std::unordered_map<uint32_t, size_t>& record_ts_sz)
const UnorderedMap<uint32_t, size_t>& running_ts_sz,
const UnorderedMap<uint32_t, size_t>& record_ts_sz)
: running_ts_sz_(running_ts_sz),
record_ts_sz_(record_ts_sz),
new_batch_(new WriteBatch()),
......@@ -234,8 +234,8 @@ Status TimestampRecoveryHandler::ReconcileTimestampDiscrepancy(
Status HandleWriteBatchTimestampSizeDifference(
const WriteBatch* batch,
const std::unordered_map<uint32_t, size_t>& running_ts_sz,
const std::unordered_map<uint32_t, size_t>& record_ts_sz,
const UnorderedMap<uint32_t, size_t>& running_ts_sz,
const UnorderedMap<uint32_t, size_t>& record_ts_sz,
TimestampSizeConsistencyMode check_mode,
std::unique_ptr<WriteBatch>* new_batch) {
// Quick path to bypass checking the WriteBatch.
......
......@@ -16,6 +16,7 @@
#include "rocksdb/status.h"
#include "rocksdb/write_batch.h"
#include "util/coding.h"
#include "util/hash_containers.h"
namespace ROCKSDB_NAMESPACE {
......@@ -102,9 +103,8 @@ class UserDefinedTimestampSizeRecord {
// but not equal, return Status::InvalidArgument.
class TimestampRecoveryHandler : public WriteBatch::Handler {
public:
TimestampRecoveryHandler(
const std::unordered_map<uint32_t, size_t>& running_ts_sz,
const std::unordered_map<uint32_t, size_t>& record_ts_sz);
TimestampRecoveryHandler(const UnorderedMap<uint32_t, size_t>& running_ts_sz,
const UnorderedMap<uint32_t, size_t>& record_ts_sz);
~TimestampRecoveryHandler() override {}
......@@ -155,11 +155,11 @@ class TimestampRecoveryHandler : public WriteBatch::Handler {
// Mapping from column family id to user-defined timestamp size for all
// running column families including the ones with zero timestamp size.
const std::unordered_map<uint32_t, size_t>& running_ts_sz_;
const UnorderedMap<uint32_t, size_t>& running_ts_sz_;
// Mapping from column family id to user-defined timestamp size as recorded
// in the WAL. This only contains non-zero user-defined timestamp size.
const std::unordered_map<uint32_t, size_t>& record_ts_sz_;
const UnorderedMap<uint32_t, size_t>& record_ts_sz_;
std::unique_ptr<WriteBatch> new_batch_;
// Handler is valid upon creation and becomes invalid after its `new_batch_`
......@@ -211,8 +211,8 @@ enum class TimestampSizeConsistencyMode {
// families including the ones with zero timestamp size.
Status HandleWriteBatchTimestampSizeDifference(
const WriteBatch* batch,
const std::unordered_map<uint32_t, size_t>& running_ts_sz,
const std::unordered_map<uint32_t, size_t>& record_ts_sz,
const UnorderedMap<uint32_t, size_t>& running_ts_sz,
const UnorderedMap<uint32_t, size_t>& record_ts_sz,
TimestampSizeConsistencyMode check_mode,
std::unique_ptr<WriteBatch>* new_batch = nullptr);
} // namespace ROCKSDB_NAMESPACE
......@@ -104,9 +104,8 @@ class HandleTimestampSizeDifferenceTest : public testing::Test {
}
}
void CreateWriteBatch(
const std::unordered_map<uint32_t, size_t>& ts_sz_for_batch,
WriteBatch* batch) {
void CreateWriteBatch(const UnorderedMap<uint32_t, size_t>& ts_sz_for_batch,
WriteBatch* batch) {
for (const auto& [cf_id, ts_sz] : ts_sz_for_batch) {
std::string key;
CreateKey(&key, ts_sz);
......@@ -185,9 +184,9 @@ class HandleTimestampSizeDifferenceTest : public testing::Test {
};
TEST_F(HandleTimestampSizeDifferenceTest, AllColumnFamiliesConsistent) {
std::unordered_map<uint32_t, size_t> running_ts_sz = {{1, sizeof(uint64_t)},
{2, 0}};
std::unordered_map<uint32_t, size_t> record_ts_sz = {{1, sizeof(uint64_t)}};
UnorderedMap<uint32_t, size_t> running_ts_sz = {{1, sizeof(uint64_t)},
{2, 0}};
UnorderedMap<uint32_t, size_t> record_ts_sz = {{1, sizeof(uint64_t)}};
WriteBatch batch;
CreateWriteBatch(running_ts_sz, &batch);
......@@ -204,9 +203,9 @@ TEST_F(HandleTimestampSizeDifferenceTest, AllColumnFamiliesConsistent) {
TEST_F(HandleTimestampSizeDifferenceTest,
AllInconsistentColumnFamiliesDropped) {
std::unordered_map<uint32_t, size_t> running_ts_sz = {{2, 0}};
std::unordered_map<uint32_t, size_t> record_ts_sz = {{1, sizeof(uint64_t)},
{3, sizeof(char)}};
UnorderedMap<uint32_t, size_t> running_ts_sz = {{2, 0}};
UnorderedMap<uint32_t, size_t> record_ts_sz = {{1, sizeof(uint64_t)},
{3, sizeof(char)}};
WriteBatch batch;
CreateWriteBatch(record_ts_sz, &batch);
......@@ -222,9 +221,9 @@ TEST_F(HandleTimestampSizeDifferenceTest,
}
TEST_F(HandleTimestampSizeDifferenceTest, InvolvedColumnFamiliesConsistent) {
std::unordered_map<uint32_t, size_t> running_ts_sz = {{1, sizeof(uint64_t)},
{2, sizeof(char)}};
std::unordered_map<uint32_t, size_t> record_ts_sz = {{1, sizeof(uint64_t)}};
UnorderedMap<uint32_t, size_t> running_ts_sz = {{1, sizeof(uint64_t)},
{2, sizeof(char)}};
UnorderedMap<uint32_t, size_t> record_ts_sz = {{1, sizeof(uint64_t)}};
WriteBatch batch;
CreateWriteBatch(record_ts_sz, &batch);
......@@ -241,9 +240,8 @@ TEST_F(HandleTimestampSizeDifferenceTest, InvolvedColumnFamiliesConsistent) {
TEST_F(HandleTimestampSizeDifferenceTest,
InconsistentColumnFamilyNeedsTimestampStripping) {
std::unordered_map<uint32_t, size_t> running_ts_sz = {{1, 0},
{2, sizeof(char)}};
std::unordered_map<uint32_t, size_t> record_ts_sz = {{1, sizeof(uint64_t)}};
UnorderedMap<uint32_t, size_t> running_ts_sz = {{1, 0}, {2, sizeof(char)}};
UnorderedMap<uint32_t, size_t> record_ts_sz = {{1, sizeof(uint64_t)}};
WriteBatch batch;
CreateWriteBatch(record_ts_sz, &batch);
......@@ -265,10 +263,10 @@ TEST_F(HandleTimestampSizeDifferenceTest,
TEST_F(HandleTimestampSizeDifferenceTest,
InconsistentColumnFamilyNeedsTimestampPadding) {
std::unordered_map<uint32_t, size_t> running_ts_sz = {{1, sizeof(uint64_t)}};
UnorderedMap<uint32_t, size_t> running_ts_sz = {{1, sizeof(uint64_t)}};
// Make `record_ts_sz` not contain zero timestamp size entries to follow the
// behavior of actual WAL log timestamp size record.
std::unordered_map<uint32_t, size_t> record_ts_sz;
UnorderedMap<uint32_t, size_t> record_ts_sz;
WriteBatch batch;
CreateWriteBatch({{1, 0}}, &batch);
......@@ -289,9 +287,9 @@ TEST_F(HandleTimestampSizeDifferenceTest,
TEST_F(HandleTimestampSizeDifferenceTest,
InconsistencyReconcileCopyOverDroppedColumnFamily) {
std::unordered_map<uint32_t, size_t> running_ts_sz = {{1, 0}};
std::unordered_map<uint32_t, size_t> record_ts_sz = {{1, sizeof(uint64_t)},
{2, sizeof(char)}};
UnorderedMap<uint32_t, size_t> running_ts_sz = {{1, 0}};
UnorderedMap<uint32_t, size_t> record_ts_sz = {{1, sizeof(uint64_t)},
{2, sizeof(char)}};
WriteBatch batch;
CreateWriteBatch(record_ts_sz, &batch);
std::unique_ptr<WriteBatch> new_batch(nullptr);
......@@ -308,8 +306,8 @@ TEST_F(HandleTimestampSizeDifferenceTest,
}
TEST_F(HandleTimestampSizeDifferenceTest, UnrecoverableInconsistency) {
std::unordered_map<uint32_t, size_t> running_ts_sz = {{1, sizeof(char)}};
std::unordered_map<uint32_t, size_t> record_ts_sz = {{1, sizeof(uint64_t)}};
UnorderedMap<uint32_t, size_t> running_ts_sz = {{1, sizeof(char)}};
UnorderedMap<uint32_t, size_t> record_ts_sz = {{1, sizeof(uint64_t)}};
WriteBatch batch;
CreateWriteBatch(record_ts_sz, &batch);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册