diff --git a/db/db_sst_test.cc b/db/db_sst_test.cc index fd50abfb57441ccfcc96d48770aed1474b5451dc..644b765ef40842e0a61f5be6c40e2fd6b5c6796c 100644 --- a/db/db_sst_test.cc +++ b/db/db_sst_test.cc @@ -337,6 +337,7 @@ TEST_F(DBSSTTest, RateLimitedDelete) { ASSERT_OK(s); options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec); auto sfm = static_cast(options.sst_file_manager.get()); + sfm->delete_scheduler()->TEST_SetMaxTrashDBRatio(1.1); ASSERT_OK(TryReopen(options)); // Create 4 files in L0 @@ -402,6 +403,7 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) { env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s)); ASSERT_OK(s); auto sfm = static_cast(options.sst_file_manager.get()); + sfm->delete_scheduler()->TEST_SetMaxTrashDBRatio(1.1); DestroyAndReopen(options); @@ -456,9 +458,14 @@ TEST_F(DBSSTTest, DestroyDBWithRateLimitedDelete) { [&](void* arg) { bg_delete_file++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + Status s; Options options = CurrentOptions(); options.disable_auto_compactions = true; options.env = env_; + std::string trash_dir = test::TmpDir(env_) + "/trash"; + options.sst_file_manager.reset( + NewSstFileManager(env_, nullptr, trash_dir, 0, false, &s)); + ASSERT_OK(s); DestroyAndReopen(options); // Create 4 files in L0 @@ -471,15 +478,12 @@ TEST_F(DBSSTTest, DestroyDBWithRateLimitedDelete) { // Close DB and destroy it using DeleteScheduler Close(); - std::string trash_dir = test::TmpDir(env_) + "/trash"; - int64_t rate_bytes_per_sec = 1024 * 1024; // 1 Mb / Sec - Status s; - options.sst_file_manager.reset(NewSstFileManager( - env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s)); - ASSERT_OK(s); - ASSERT_OK(DestroyDB(dbname_, options)); auto sfm = static_cast(options.sst_file_manager.get()); + + sfm->SetDeleteRateBytesPerSecond(1024 * 1024); + sfm->delete_scheduler()->TEST_SetMaxTrashDBRatio(1.1); + ASSERT_OK(DestroyDB(dbname_, options)); sfm->WaitForEmptyTrash(); // We have deleted the 4 sst files in the delete_scheduler ASSERT_EQ(bg_delete_file, 4); diff --git a/util/delete_scheduler.cc b/util/delete_scheduler.cc index 5165383f41fd5d785563532ebe79b2400b9ad998..8ff979d7d281438dbe2bcf1f0b387557d9d526c5 100644 --- a/util/delete_scheduler.cc +++ b/util/delete_scheduler.cc @@ -26,12 +26,14 @@ DeleteScheduler::DeleteScheduler(Env* env, const std::string& trash_dir, SstFileManagerImpl* sst_file_manager) : env_(env), trash_dir_(trash_dir), + total_trash_size_(0), rate_bytes_per_sec_(rate_bytes_per_sec), pending_files_(0), closing_(false), cv_(&mu_), info_log_(info_log), sst_file_manager_(sst_file_manager) { + assert(sst_file_manager != nullptr); bg_thread_.reset( new port::Thread(&DeleteScheduler::BackgroundEmptyTrash, this)); } @@ -49,11 +51,14 @@ DeleteScheduler::~DeleteScheduler() { Status DeleteScheduler::DeleteFile(const std::string& file_path) { Status s; - if (rate_bytes_per_sec_.load() <= 0) { - // Rate limiting is disabled + if (rate_bytes_per_sec_.load() <= 0 || + total_trash_size_.load() > + sst_file_manager_->GetTotalSize() * max_trash_db_ratio_) { + // Rate limiting is disabled or trash size makes up more than + // max_trash_db_ratio_ (default 25%) of the total DB size TEST_SYNC_POINT("DeleteScheduler::DeleteFile"); s = env_->DeleteFile(file_path); - if (s.ok() && sst_file_manager_) { + if (s.ok()) { sst_file_manager_->OnDeleteFile(file_path); } return s; @@ -66,7 +71,7 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path) { ROCKS_LOG_ERROR(info_log_, "Failed to move %s to trash directory (%s)", file_path.c_str(), trash_dir_.c_str()); s = env_->DeleteFile(file_path); - if (s.ok() && sst_file_manager_) { + if (s.ok()) { sst_file_manager_->OnDeleteFile(file_path); } return s; @@ -123,8 +128,10 @@ Status DeleteScheduler::MoveToTrash(const std::string& file_path, break; } } - if (s.ok() && sst_file_manager_) { - sst_file_manager_->OnMoveFile(file_path, *path_in_trash); + if (s.ok()) { + uint64_t trash_file_size = 0; + sst_file_manager_->OnMoveFile(file_path, *path_in_trash, &trash_file_size); + total_trash_size_.fetch_add(trash_file_size); } return s; } @@ -210,9 +217,8 @@ Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash, *deleted_bytes = 0; } else { *deleted_bytes = file_size; - if (sst_file_manager_) { - sst_file_manager_->OnDeleteFile(path_in_trash); - } + total_trash_size_.fetch_sub(file_size); + sst_file_manager_->OnDeleteFile(path_in_trash); } return s; diff --git a/util/delete_scheduler.h b/util/delete_scheduler.h index ec29f4cdc843246a8e75fbd333d527401d951a09..ffc770a2d77f74fef57135d96302b717b2cce9ef 100644 --- a/util/delete_scheduler.h +++ b/util/delete_scheduler.h @@ -59,6 +59,13 @@ class DeleteScheduler { // file_path => error status std::map GetBackgroundErrors(); + uint64_t GetTotalTrashSize() { return total_trash_size_.load(); } + + void TEST_SetMaxTrashDBRatio(double r) { + assert(r >= 0); + max_trash_db_ratio_ = r; + } + private: Status MoveToTrash(const std::string& file_path, std::string* path_in_trash); @@ -70,6 +77,8 @@ class DeleteScheduler { Env* env_; // Path to the trash directory std::string trash_dir_; + // total size of trash directory + std::atomic total_trash_size_; // Maximum number of bytes that should be deleted per second std::atomic rate_bytes_per_sec_; // Mutex to protect queue_, pending_files_, bg_errors_, closing_ @@ -93,6 +102,9 @@ class DeleteScheduler { InstrumentedMutex file_move_mu_; Logger* info_log_; SstFileManagerImpl* sst_file_manager_; + // If the trash size constitutes for more than 25% of the total DB size + // we will start deleting new files passed to DeleteScheduler immediately + double max_trash_db_ratio_ = 0.25; static const uint64_t kMicrosInSecond = 1000 * 1000LL; }; diff --git a/util/delete_scheduler_test.cc b/util/delete_scheduler_test.cc index 879651f3d783dd95e3443676f25d29ad034cdbb4..071ceea9dbaa266bcda89c2711fe8c27c4613c86 100644 --- a/util/delete_scheduler_test.cc +++ b/util/delete_scheduler_test.cc @@ -17,6 +17,7 @@ #include "rocksdb/env.h" #include "rocksdb/options.h" #include "util/delete_scheduler.h" +#include "util/sst_file_manager_impl.h" #include "util/string_util.h" #include "util/sync_point.h" #include "util/testharness.h" @@ -61,20 +62,26 @@ class DeleteSchedulerTest : public testing::Test { std::string data(size, 'A'); EXPECT_OK(f->Append(data)); EXPECT_OK(f->Close()); + sst_file_mgr_->OnAddFile(file_path); return file_path; } void NewDeleteScheduler() { ASSERT_OK(env_->CreateDirIfMissing(trash_dir_)); - delete_scheduler_.reset(new DeleteScheduler( - env_, trash_dir_, rate_bytes_per_sec_, nullptr, nullptr)); + sst_file_mgr_.reset( + new SstFileManagerImpl(env_, nullptr, trash_dir_, rate_bytes_per_sec_)); + delete_scheduler_ = sst_file_mgr_->delete_scheduler(); + // Tests in this file are for DeleteScheduler component and dont create any + // DBs, so we need to use set this value to 100% (instead of default 25%) + delete_scheduler_->TEST_SetMaxTrashDBRatio(1.1); } Env* env_; std::string dummy_files_dir_; std::string trash_dir_; int64_t rate_bytes_per_sec_; - std::shared_ptr delete_scheduler_; + DeleteScheduler* delete_scheduler_; + std::unique_ptr sst_file_mgr_; }; // Test the basic functionality of DeleteScheduler (Rate Limiting). @@ -389,7 +396,7 @@ TEST_F(DeleteSchedulerTest, DestructorWithNonEmptyQueue) { // Deleting 100 files will need >28 hours to delete // we will delete the DeleteScheduler while delete queue is not empty - delete_scheduler_.reset(); + sst_file_mgr_.reset(); ASSERT_LT(bg_delete_file, 100); ASSERT_GT(CountFilesInDir(trash_dir_), 0); @@ -508,6 +515,42 @@ TEST_F(DeleteSchedulerTest, DISABLED_DynamicRateLimiting1) { } } +TEST_F(DeleteSchedulerTest, ImmediateDeleteOn25PercDBSize) { + int bg_delete_file = 0; + int fg_delete_file = 0; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DeleteScheduler::DeleteTrashFile:DeleteFile", + [&](void* arg) { bg_delete_file++; }); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DeleteScheduler::DeleteFile", [&](void* arg) { fg_delete_file++; }); + + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + int num_files = 100; // 100 files + uint64_t file_size = 1024 * 10; // 100 KB as a file size + rate_bytes_per_sec_ = 1; // 1 byte per sec (very slow trash delete) + + NewDeleteScheduler(); + delete_scheduler_->TEST_SetMaxTrashDBRatio(0.25); + + std::vector generated_files; + for (int i = 0; i < num_files; i++) { + std::string file_name = "file" + ToString(i) + ".data"; + generated_files.push_back(NewDummyFile(file_name, file_size)); + } + + for (std::string& file_name : generated_files) { + delete_scheduler_->DeleteFile(file_name); + } + + // When we end up with 24 files in trash we will start + // deleting new files immediately + ASSERT_EQ(fg_delete_file, 74); + ASSERT_EQ(CountFilesInDir(trash_dir_), 25); + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/util/sst_file_manager_impl.cc b/util/sst_file_manager_impl.cc index dffaa6c03c91fb5f163663fadca7071b01f1f988..25e97aacc5a1dab5aadd27408808aa2114531a00 100644 --- a/util/sst_file_manager_impl.cc +++ b/util/sst_file_manager_impl.cc @@ -51,9 +51,13 @@ Status SstFileManagerImpl::OnDeleteFile(const std::string& file_path) { } Status SstFileManagerImpl::OnMoveFile(const std::string& old_path, - const std::string& new_path) { + const std::string& new_path, + uint64_t* file_size) { { MutexLock l(&mu_); + if (file_size != nullptr) { + *file_size = tracked_files_[old_path]; + } OnAddFileImpl(new_path, tracked_files_[old_path]); OnDeleteFileImpl(old_path); } diff --git a/util/sst_file_manager_impl.h b/util/sst_file_manager_impl.h index 2caed72ec774da90e8612ae77e15b0127414331a..c19ba61334bd7525b38eec49269fe27904bda99c 100644 --- a/util/sst_file_manager_impl.h +++ b/util/sst_file_manager_impl.h @@ -39,7 +39,8 @@ class SstFileManagerImpl : public SstFileManager { Status OnDeleteFile(const std::string& file_path); // DB will call OnMoveFile whenever an sst file is move to a new path. - Status OnMoveFile(const std::string& old_path, const std::string& new_path); + Status OnMoveFile(const std::string& old_path, const std::string& new_path, + uint64_t* file_size = nullptr); // Update the maximum allowed space that should be used by RocksDB, if // the total size of the SST files exceeds max_allowed_space, writes to @@ -76,6 +77,8 @@ class SstFileManagerImpl : public SstFileManager { // destructor to be called. virtual void WaitForEmptyTrash(); + DeleteScheduler* delete_scheduler() { return &delete_scheduler_; } + private: // REQUIRES: mutex locked void OnAddFileImpl(const std::string& file_path, uint64_t file_size);