diff --git a/HISTORY.md b/HISTORY.md index 893d46040698c8909bdddcdf9939b00bcdcca83f..d88a0a13c75f9f82ea3eba51e4a7ef46be946ac9 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,7 @@ ### Bug Fixes * Fix wrong result being read from ingested file. May happen when a key in the file happen to be prefix of another key also in the file. The issue can further cause more data corruption. The issue exists with rocksdb >= 5.0.0 since DB::IngestExternalFile() was introduced. * Finish implementation of BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey. It's now ready for use. Significantly reduces read amplification in some setups, especially for iterator seeks. +* Fix a bug by updating CURRENT file so that it points to the correct MANIFEST file after best-efforts recovery. ### Public API Change * Add a ConfigOptions argument to the APIs dealing with converting options to and from strings and files. The ConfigOptions is meant to replace some of the options (such as input_strings_escaped and ignore_unknown_options) and allow for more parameters to be passed in the future without changing the function signature. diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 4d2619c1463c7ae195fd7760cc895d3cc8ece42d..2c9f9557c8a55abdbefc0ae67f4367e28b672475 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -1850,6 +1850,29 @@ TEST_F(DBBasicTest, RecoverWithMissingFiles) { } } +TEST_F(DBBasicTest, RecoverWithNoCurrentFile) { + Options options = CurrentOptions(); + options.env = env_; + DestroyAndReopen(options); + CreateAndReopenWithCF({"pikachu"}, options); + options.best_efforts_recovery = true; + ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); + ASSERT_EQ(2, handles_.size()); + ASSERT_OK(Put("foo", "value")); + ASSERT_OK(Put(1, "bar", "value")); + ASSERT_OK(Flush()); + ASSERT_OK(Flush(1)); + Close(); + ASSERT_OK(env_->DeleteFile(CurrentFileName(dbname_))); + ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); + std::vector cf_names; + ASSERT_OK(DB::ListColumnFamilies(DBOptions(options), dbname_, &cf_names)); + ASSERT_EQ(2, cf_names.size()); + for (const auto& name : cf_names) { + ASSERT_TRUE(name == kDefaultColumnFamilyName || name == "pikachu"); + } +} + TEST_F(DBBasicTest, SkipWALIfMissingTableFiles) { Options options = CurrentOptions(); DestroyAndReopen(options); diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 5f1a0dbf930d924e82e7551877fced389a8984ff..6a5bc23faf6d3b9bc77222a14dad86e7a5cb9a63 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1149,13 +1149,11 @@ class DBImpl : public DB { // REQUIRES: db mutex held when calling this function, but the db mutex can // be released and re-acquired. Db mutex will be held when the function // returns. - // Currently, this function should be called only in best-efforts recovery - // mode. // After best-efforts recovery, there may be SST files in db/cf paths that are // not referenced in the MANIFEST. We delete these SST files. In the // meantime, we find out the largest file number present in the paths, and // bump up the version set's next_file_number_ to be 1 + largest_file_number. - Status CleanupFilesAfterRecovery(); + Status FinishBestEffortsRecovery(); private: friend class DB; diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index 20381eaa92f23b4691db1eba12f5216c9189ea89..74de214633b7f7ba2eecf31668abfc73bd035efa 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -665,7 +665,7 @@ uint64_t PrecomputeMinLogNumberToKeep( return min_log_number_to_keep; } -Status DBImpl::CleanupFilesAfterRecovery() { +Status DBImpl::FinishBestEffortsRecovery() { mutex_.AssertHeld(); std::vector paths; paths.push_back(dbname_); @@ -704,8 +704,22 @@ Status DBImpl::CleanupFilesAfterRecovery() { if (largest_file_number > next_file_number) { versions_->next_file_number_.store(largest_file_number + 1); } + + VersionEdit edit; + edit.SetNextFile(versions_->next_file_number_.load()); + assert(versions_->GetColumnFamilySet()); + ColumnFamilyData* default_cfd = versions_->GetColumnFamilySet()->GetDefault(); + assert(default_cfd); + // Even if new_descriptor_log is false, we will still switch to a new + // MANIFEST and update CURRENT file, since this is in recovery. + Status s = versions_->LogAndApply( + default_cfd, *default_cfd->GetLatestMutableCFOptions(), &edit, &mutex_, + directories_.GetDbDir(), /*new_descriptor_log*/ false); + if (!s.ok()) { + return s; + } + mutex_.Unlock(); - Status s; for (const auto& fname : files_to_delete) { s = env_->DeleteFile(fname); if (!s.ok()) { diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index eee5430941dce0262d2daf0ad3fce92cfbd8e1bb..631dc7c047afe4e18d69910b68136c779d9dfa9e 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -425,7 +425,7 @@ Status DBImpl::Recover( s = versions_->TryRecover(column_families, read_only, &db_id_, &missing_table_file); if (s.ok()) { - s = CleanupFilesAfterRecovery(); + s = FinishBestEffortsRecovery(); } } if (!s.ok()) {