diff --git a/HISTORY.md b/HISTORY.md index e9688249dba6f5424c7c26f41c6e90d6d1b31f79..4e9e4ca6e76d151ad24be1991107d5668c87f6fe 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -17,6 +17,7 @@ * Avoid unnecessarily flushing in `CompactRange()` when the range specified by the user does not overlap unflushed memtables. * If `ColumnFamilyOptions::max_subcompactions` is set greater than one, we now parallelize large manual level-based compactions. * Add "rocksdb.live-sst-files-size" DB property to return total bytes of all SST files belong to the latest LSM tree. +* NewSstFileManager to add an argument bytes_max_delete_chunk with default 64MB. With this argument, a file larger than 64MB will be ftruncated multiple times based on this size. ### Bug Fixes * Fix a leak in prepared_section_completed_ where the zeroed entries would not removed from the map. diff --git a/include/rocksdb/sst_file_manager.h b/include/rocksdb/sst_file_manager.h index fa18f836c7a83005f40588b1ad6b7bee2d557988..c0e109f1eb2dd7bc95fff8ccafd8d15f331f4048 100644 --- a/include/rocksdb/sst_file_manager.h +++ b/include/rocksdb/sst_file_manager.h @@ -96,10 +96,14 @@ class SstFileManager { // @param max_trash_db_ratio: If the trash size constitutes for more than this // fraction of the total DB size we will start deleting new files passed to // DeleteScheduler immediately +// @param bytes_max_delete_chunk: if a single file is larger than delete chunk, +// ftruncate the file by this size each time, rather than dropping the whole +// file. 0 means to always delete the whole file. extern SstFileManager* NewSstFileManager( Env* env, std::shared_ptr info_log = nullptr, std::string trash_dir = "", int64_t rate_bytes_per_sec = 0, bool delete_existing_trash = true, Status* status = nullptr, - double max_trash_db_ratio = 0.25); + double max_trash_db_ratio = 0.25, + uint64_t bytes_max_delete_chunk = 64 * 1024 * 1024); } // namespace rocksdb diff --git a/util/delete_scheduler.cc b/util/delete_scheduler.cc index ec7e2f4d2719c8491602ca370f0fc7f0e9fc448a..8b05a5c90b8a98350cec9a467a284b7ac7f1082a 100644 --- a/util/delete_scheduler.cc +++ b/util/delete_scheduler.cc @@ -22,11 +22,13 @@ namespace rocksdb { DeleteScheduler::DeleteScheduler(Env* env, int64_t rate_bytes_per_sec, Logger* info_log, SstFileManagerImpl* sst_file_manager, - double max_trash_db_ratio) + double max_trash_db_ratio, + uint64_t bytes_max_delete_chunk) : env_(env), total_trash_size_(0), rate_bytes_per_sec_(rate_bytes_per_sec), pending_files_(0), + bytes_max_delete_chunk_(bytes_max_delete_chunk), closing_(false), cv_(&mu_), info_log_(info_log), @@ -208,15 +210,18 @@ void DeleteScheduler::BackgroundEmptyTrash() { // Get new file to delete std::string path_in_trash = queue_.front(); - queue_.pop(); // We dont need to hold the lock while deleting the file mu_.Unlock(); uint64_t deleted_bytes = 0; + bool is_complete = true; // Delete file from trash and update total_penlty value - Status s = DeleteTrashFile(path_in_trash, &deleted_bytes); + Status s = DeleteTrashFile(path_in_trash, &deleted_bytes, &is_complete); total_deleted_bytes += deleted_bytes; mu_.Lock(); + if (is_complete) { + queue_.pop(); + } if (!s.ok()) { bg_errors_[path_in_trash] = s; @@ -236,7 +241,9 @@ void DeleteScheduler::BackgroundEmptyTrash() { TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait", &total_penlty); - pending_files_--; + if (is_complete) { + pending_files_--; + } if (pending_files_ == 0) { // Unblock WaitForEmptyTrash since there are no more files waiting // to be deleted @@ -247,23 +254,49 @@ void DeleteScheduler::BackgroundEmptyTrash() { } Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash, - uint64_t* deleted_bytes) { + uint64_t* deleted_bytes, + bool* is_complete) { uint64_t file_size; Status s = env_->GetFileSize(path_in_trash, &file_size); + *is_complete = true; + TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile"); if (s.ok()) { - TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile"); - s = env_->DeleteFile(path_in_trash); - } + bool need_full_delete = true; + if (bytes_max_delete_chunk_ != 0 && file_size > bytes_max_delete_chunk_) { + unique_ptr wf; + Status my_status = + env_->ReopenWritableFile(path_in_trash, &wf, EnvOptions()); + if (my_status.ok()) { + my_status = wf->Truncate(file_size - bytes_max_delete_chunk_); + if (my_status.ok()) { + TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:Fsync"); + my_status = wf->Fsync(); + } + } + if (my_status.ok()) { + *deleted_bytes = bytes_max_delete_chunk_; + need_full_delete = false; + *is_complete = false; + } else { + ROCKS_LOG_WARN(info_log_, + "Failed to partially delete %s from trash -- %s", + path_in_trash.c_str(), my_status.ToString().c_str()); + } + } + if (need_full_delete) { + s = env_->DeleteFile(path_in_trash); + *deleted_bytes = file_size; + sst_file_manager_->OnDeleteFile(path_in_trash); + } + } if (!s.ok()) { // Error while getting file size or while deleting ROCKS_LOG_ERROR(info_log_, "Failed to delete %s from trash -- %s", path_in_trash.c_str(), s.ToString().c_str()); *deleted_bytes = 0; } else { - *deleted_bytes = file_size; - total_trash_size_.fetch_sub(file_size); - sst_file_manager_->OnDeleteFile(path_in_trash); + total_trash_size_.fetch_sub(*deleted_bytes); } return s; diff --git a/util/delete_scheduler.h b/util/delete_scheduler.h index c142d07a4dd7906b669527093f59034df4503709..cc456dcb9bd6bf24c65ea2665b42ffb23a9000de 100644 --- a/util/delete_scheduler.h +++ b/util/delete_scheduler.h @@ -34,7 +34,7 @@ class DeleteScheduler { public: DeleteScheduler(Env* env, int64_t rate_bytes_per_sec, Logger* info_log, SstFileManagerImpl* sst_file_manager, - double max_trash_db_ratio); + double max_trash_db_ratio, uint64_t bytes_max_delete_chunk); ~DeleteScheduler(); @@ -82,7 +82,7 @@ class DeleteScheduler { Status MarkAsTrash(const std::string& file_path, std::string* path_in_trash); Status DeleteTrashFile(const std::string& path_in_trash, - uint64_t* deleted_bytes); + uint64_t* deleted_bytes, bool* is_complete); void BackgroundEmptyTrash(); @@ -97,6 +97,7 @@ class DeleteScheduler { std::queue queue_; // Number of trash files that are waiting to be deleted int32_t pending_files_; + uint64_t bytes_max_delete_chunk_; // Errors that happened in BackgroundEmptyTrash (file_path => error) std::map bg_errors_; // Set to true in ~DeleteScheduler() to force BackgroundEmptyTrash to stop diff --git a/util/delete_scheduler_test.cc b/util/delete_scheduler_test.cc index 936e4d7caf075cbc31d7384f998458fd3095734f..bb8e81f7dc8ac0e3163b36e88b0f05f61976f841 100644 --- a/util/delete_scheduler_test.cc +++ b/util/delete_scheduler_test.cc @@ -99,7 +99,7 @@ class DeleteSchedulerTest : public testing::Test { // 25%) sst_file_mgr_.reset( new SstFileManagerImpl(env_, nullptr, rate_bytes_per_sec_, - /* max_trash_db_ratio= */ 1.1)); + /* max_trash_db_ratio= */ 1.1, 128 * 1024)); delete_scheduler_ = sst_file_mgr_->delete_scheduler(); } @@ -436,6 +436,34 @@ TEST_F(DeleteSchedulerTest, StartBGEmptyTrashMultipleTimes) { rocksdb::SyncPoint::GetInstance()->EnableProcessing(); } +TEST_F(DeleteSchedulerTest, DeletePartialFile) { + int bg_delete_file = 0; + int bg_fsync = 0; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DeleteScheduler::DeleteTrashFile:DeleteFile", + [&](void*) { bg_delete_file++; }); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DeleteScheduler::DeleteTrashFile:Fsync", [&](void*) { bg_fsync++; }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / sec + NewDeleteScheduler(); + + // Should delete in 4 batch + ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile("data_1", 500 * 1024))); + ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile("data_2", 100 * 1024))); + // Should delete in 2 batch + ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile("data_2", 200 * 1024))); + + delete_scheduler_->WaitForEmptyTrash(); + + auto bg_errors = delete_scheduler_->GetBackgroundErrors(); + ASSERT_EQ(bg_errors.size(), 0); + ASSERT_EQ(7, bg_delete_file); + ASSERT_EQ(4, bg_fsync); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); +} + // 1- Create a DeleteScheduler with very slow rate limit (1 Byte / sec) // 2- Delete 100 files using DeleteScheduler // 3- Delete the DeleteScheduler (call the destructor while queue is not empty) diff --git a/util/sst_file_manager_impl.cc b/util/sst_file_manager_impl.cc index 435a8f35774b1f4a5d861be7334428b65790b38b..07d27695ca0f8f8f2be4b21e223d28fa14835927 100644 --- a/util/sst_file_manager_impl.cc +++ b/util/sst_file_manager_impl.cc @@ -18,7 +18,8 @@ namespace rocksdb { #ifndef ROCKSDB_LITE SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr logger, int64_t rate_bytes_per_sec, - double max_trash_db_ratio) + double max_trash_db_ratio, + uint64_t bytes_max_delete_chunk) : env_(env), logger_(logger), total_files_size_(0), @@ -26,7 +27,7 @@ SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr logger, cur_compactions_reserved_size_(0), max_allowed_space_(0), delete_scheduler_(env, rate_bytes_per_sec, logger.get(), this, - max_trash_db_ratio) {} + max_trash_db_ratio, bytes_max_delete_chunk) {} SstFileManagerImpl::~SstFileManagerImpl() {} @@ -196,10 +197,11 @@ SstFileManager* NewSstFileManager(Env* env, std::shared_ptr info_log, std::string trash_dir, int64_t rate_bytes_per_sec, bool delete_existing_trash, Status* status, - double max_trash_db_ratio) { + double max_trash_db_ratio, + uint64_t bytes_max_delete_chunk) { SstFileManagerImpl* res = new SstFileManagerImpl(env, info_log, rate_bytes_per_sec, - max_trash_db_ratio); + max_trash_db_ratio, bytes_max_delete_chunk); // trash_dir is deprecated and not needed anymore, but if user passed it // we will still remove files in it. @@ -236,7 +238,8 @@ SstFileManager* NewSstFileManager(Env* env, std::shared_ptr info_log, std::string trash_dir, int64_t rate_bytes_per_sec, bool delete_existing_trash, Status* status, - double max_trash_db_ratio) { + double max_trash_db_ratio, + uint64_t bytes_max_delete_chunk) { if (status) { *status = Status::NotSupported("SstFileManager is not supported in ROCKSDB_LITE"); diff --git a/util/sst_file_manager_impl.h b/util/sst_file_manager_impl.h index db2ba08234a1689c47d8ca38c4828ade95f4d9d6..b7a557d9982ec60688adee7aa7e6e421ac2c713d 100644 --- a/util/sst_file_manager_impl.h +++ b/util/sst_file_manager_impl.h @@ -27,7 +27,8 @@ class SstFileManagerImpl : public SstFileManager { public: explicit SstFileManagerImpl(Env* env, std::shared_ptr logger, int64_t rate_bytes_per_sec, - double max_trash_db_ratio); + double max_trash_db_ratio, + uint64_t bytes_max_delete_chunk); ~SstFileManagerImpl();