提交 5040414e 编写于 作者: A Andrew Kryczka 提交者: Facebook Github Bot

Gracefully handle previous backup interrupted

Summary:
As the last step in backup creation, the .tmp directory is renamed omitting the .tmp suffix. In case the process terminates before this, the .tmp directory will be left behind. Even if this happens, we want future backups to succeed, so I added some checks/cleanup for this case.
Closes https://github.com/facebook/rocksdb/pull/1896

Differential Revision: D4597323

Pulled By: ajkr

fbshipit-source-id: 48900d8
上级 f206af56
......@@ -709,9 +709,18 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
Log(options_.info_log, "Started the backup process -- creating backup %u",
new_backup_id);
// create temporary private dir
s = backup_env_->CreateDir(
GetAbsolutePath(GetPrivateFileRel(new_backup_id, true)));
auto private_tmp_dir = GetAbsolutePath(GetPrivateFileRel(new_backup_id, true));
s = backup_env_->FileExists(private_tmp_dir);
if (s.ok()) {
// maybe last backup failed and left partial state behind, clean it up
s = GarbageCollect();
} else if (s.IsNotFound()) {
// normal case, the new backup's private dir doesn't exist yet
s = Status::OK();
}
if (s.ok()) {
s = backup_env_->CreateDir(private_tmp_dir);
}
RateLimiter* rate_limiter = options_.backup_rate_limiter.get();
if (rate_limiter) {
......
......@@ -196,6 +196,9 @@ class TestEnv : public EnvWrapper {
virtual Status DeleteFile(const std::string& fname) override {
MutexLock l(&mutex_);
if (fail_delete_files_) {
return Status::IOError();
}
EXPECT_GT(limit_delete_files_, 0U);
limit_delete_files_--;
return EnvWrapper::DeleteFile(fname);
......@@ -224,6 +227,11 @@ class TestEnv : public EnvWrapper {
limit_delete_files_ = limit;
}
void SetDeleteFileFailure(bool fail) {
MutexLock l(&mutex_);
fail_delete_files_ = fail;
}
void SetDummySequentialFile(bool dummy_sequential_file) {
MutexLock l(&mutex_);
dummy_sequential_file_ = dummy_sequential_file;
......@@ -286,6 +294,7 @@ class TestEnv : public EnvWrapper {
std::vector<std::string> filenames_for_mocked_attrs_;
uint64_t limit_written_files_ = 1000000;
uint64_t limit_delete_files_ = 1000000;
bool fail_delete_files_ = false;
bool get_children_failure_ = false;
bool create_dir_if_missing_failure_ = false;
......@@ -923,6 +932,32 @@ TEST_F(BackupableDBTest, CorruptionsTest) {
AssertBackupConsistency(2, 0, keys_iteration * 2, keys_iteration * 5);
}
TEST_F(BackupableDBTest, InterruptCreationTest) {
// Interrupt backup creation by failing new writes and failing cleanup of the
// partial state. Then verify a subsequent backup can still succeed.
const int keys_iteration = 5000;
Random rnd(6);
OpenDBAndBackupEngine(true /* destroy_old_data */);
FillDB(db_.get(), 0, keys_iteration);
test_backup_env_->SetLimitWrittenFiles(2);
test_backup_env_->SetDeleteFileFailure(true);
// should fail creation
ASSERT_FALSE(
backup_engine_->CreateNewBackup(db_.get(), !!(rnd.Next() % 2)).ok());
CloseDBAndBackupEngine();
// should also fail cleanup so the tmp directory stays behind
ASSERT_OK(backup_chroot_env_->FileExists(backupdir_ + "/private/1.tmp/"));
OpenDBAndBackupEngine(false /* destroy_old_data */);
test_backup_env_->SetLimitWrittenFiles(1000000);
test_backup_env_->SetDeleteFileFailure(false);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), !!(rnd.Next() % 2)));
// latest backup should have all the keys
CloseDBAndBackupEngine();
AssertBackupConsistency(0, 0, keys_iteration);
}
inline std::string OptionsPath(std::string ret, int backupID) {
ret += "/private/";
ret += std::to_string(backupID);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册