提交 25ac1697 编写于 作者: S Sagar Vemuri 提交者: Facebook Github Bot

Blob DB: Evict oldest blob file when close to blob db size limit

Summary:
Evict oldest blob file and put it in obsolete_files list when close to blob db size limit. The file will be delete when the `DeleteObsoleteFiles` background job runs next time.
For now I set `kEvictOldestFileAtSize` constant, which controls when to evict the oldest file, at 90%. It could be tweaked or made into an option if really needed; I didn't want to expose it as an option pre-maturely as there are already too many :) .
Closes https://github.com/facebook/rocksdb/pull/3094

Differential Revision: D6187340

Pulled By: sagar0

fbshipit-source-id: 687f8262101b9301bf964b94025a2fe9d8573421
上级 3c208e76
......@@ -62,7 +62,7 @@ bool blobf_compare_ttl::operator()(const std::shared_ptr<BlobFile>& lhs,
if (lhs->expiration_range_.first > rhs->expiration_range_.first) {
return false;
}
return lhs->BlobFileNumber() > rhs->BlobFileNumber();
return lhs->BlobFileNumber() < rhs->BlobFileNumber();
}
void EvictAllVersionsCompactionListener::InternalListener::OnCompaction(
......@@ -117,7 +117,8 @@ BlobDBImpl::BlobDBImpl(const std::string& dbname,
total_periods_ampl_(0),
total_blob_space_(0),
open_p1_done_(false),
debug_level_(0) {
debug_level_(0),
oldest_file_evicted_(false) {
blob_dir_ = (bdb_options_.path_relative)
? dbname + "/" + bdb_options_.blob_dir
: bdb_options_.blob_dir;
......@@ -171,7 +172,8 @@ BlobDBImpl::BlobDBImpl(DB* db, const BlobDBOptions& blob_db_options)
last_period_ampl_(0),
total_periods_write_(0),
total_periods_ampl_(0),
total_blob_space_(0) {
total_blob_space_(0),
oldest_file_evicted_(false) {
if (!bdb_options_.blob_dir.empty())
blob_dir_ = (bdb_options_.path_relative)
? db_->GetName() + "/" + bdb_options_.blob_dir
......@@ -931,20 +933,56 @@ uint64_t BlobDBImpl::ExtractExpiration(const Slice& key, const Slice& value,
return has_expiration ? expiration : kNoExpiration;
}
std::shared_ptr<BlobFile> BlobDBImpl::GetOldestBlobFile() {
std::vector<std::shared_ptr<BlobFile>> blob_files;
CopyBlobFiles(&blob_files, [](const std::shared_ptr<BlobFile>& f) {
return !f->Obsolete() && f->Immutable();
});
blobf_compare_ttl compare;
return *std::min_element(blob_files.begin(), blob_files.end(), compare);
}
bool BlobDBImpl::EvictOldestBlobFile() {
auto oldest_file = GetOldestBlobFile();
if (oldest_file == nullptr) {
return false;
}
WriteLock wl(&mutex_);
oldest_file->SetCanBeDeleted();
obsolete_files_.push_front(oldest_file);
oldest_file_evicted_.store(true);
return true;
}
Status BlobDBImpl::CheckSize(size_t blob_size) {
uint64_t new_space_util = total_blob_space_.load() + blob_size;
if (bdb_options_.blob_dir_size > 0) {
if (!bdb_options_.is_fifo &&
(new_space_util > bdb_options_.blob_dir_size)) {
return Status::NoSpace(
"Write failed, as writing it would exceed blob_dir_size limit.");
}
if (bdb_options_.is_fifo && !oldest_file_evicted_.load() &&
(new_space_util >
kEvictOldestFileAtSize * bdb_options_.blob_dir_size)) {
EvictOldestBlobFile();
}
}
return Status::OK();
}
Status BlobDBImpl::AppendBlob(const std::shared_ptr<BlobFile>& bfile,
const std::string& headerbuf, const Slice& key,
const Slice& value, uint64_t expiration,
std::string* index_entry) {
auto size_put = BlobLogRecord::kHeaderSize + key.size() + value.size();
if (bdb_options_.blob_dir_size > 0 &&
(total_blob_space_.load() + size_put) > bdb_options_.blob_dir_size) {
if (!bdb_options_.is_fifo) {
return Status::NoSpace("Blob DB reached the maximum configured size.");
}
Status s = CheckSize(size_put);
if (!s.ok()) {
return s;
}
Status s;
uint64_t blob_offset = 0;
uint64_t key_offset = 0;
{
......@@ -1910,7 +1948,12 @@ std::pair<bool, int64_t> BlobDBImpl::DeleteObsoleteFiles(bool aborted) {
}
// directory change. Fsync
if (file_deleted) dir_ent_->Fsync();
if (file_deleted) {
dir_ent_->Fsync();
// reset oldest_file_evicted flag
oldest_file_evicted_.store(false);
}
// put files back into obsolete if for some reason, delete failed
if (!tobsolete.empty()) {
......@@ -1924,13 +1967,18 @@ std::pair<bool, int64_t> BlobDBImpl::DeleteObsoleteFiles(bool aborted) {
}
void BlobDBImpl::CopyBlobFiles(
std::vector<std::shared_ptr<BlobFile>>* bfiles_copy) {
std::vector<std::shared_ptr<BlobFile>>* bfiles_copy,
std::function<bool(const std::shared_ptr<BlobFile>&)> predicate) {
ReadLock rl(&mutex_);
// take a copy
bfiles_copy->reserve(blob_files_.size());
for (auto const& p : blob_files_) {
bfiles_copy->push_back(p.second);
bool pred_value = true;
if (predicate) {
pred_value = predicate(p.second);
}
if (pred_value) {
bfiles_copy->push_back(p.second);
}
}
}
......
......@@ -205,6 +205,10 @@ class BlobDBImpl : public BlobDB {
// how often to schedule check seq files period
static constexpr uint32_t kCheckSeqFilesPeriodMillisecs = 10 * 1000;
// when should oldest file be evicted:
// on reaching 90% of blob_dir_size
static constexpr double kEvictOldestFileAtSize = 0.9;
using BlobDB::Put;
Status Put(const WriteOptions& options, const Slice& key,
const Slice& value) override;
......@@ -414,7 +418,9 @@ class BlobDBImpl : public BlobDB {
bool FindFileAndEvictABlob(uint64_t file_number, uint64_t key_size,
uint64_t blob_offset, uint64_t blob_size);
void CopyBlobFiles(std::vector<std::shared_ptr<BlobFile>>* bfiles_copy);
void CopyBlobFiles(
std::vector<std::shared_ptr<BlobFile>>* bfiles_copy,
std::function<bool(const std::shared_ptr<BlobFile>&)> predicate = {});
void FilterSubsetOfFiles(
const std::vector<std::shared_ptr<BlobFile>>& blob_files,
......@@ -423,6 +429,12 @@ class BlobDBImpl : public BlobDB {
uint64_t EpochNow() { return env_->NowMicros() / 1000000; }
Status CheckSize(size_t blob_size);
std::shared_ptr<BlobFile> GetOldestBlobFile();
bool EvictOldestBlobFile();
// the base DB
DBImpl* db_impl_;
Env* env_;
......@@ -526,6 +538,8 @@ class BlobDBImpl : public BlobDB {
bool open_p1_done_;
uint32_t debug_level_;
std::atomic<bool> oldest_file_evicted_;
};
} // namespace blob_db
......
......@@ -700,12 +700,15 @@ TEST_F(BlobDBTest, GCExpiredKeyWhileOverwriting) {
VerifyDB({{"foo", "v2"}});
}
TEST_F(BlobDBTest, GCOldestSimpleBlobFileWhenOutOfSpace) {
// This test is no longer valid since we now return an error when we go
// over the configured blob_dir_size.
// The test needs to be re-written later in such a way that writes continue
// after a GC happens.
TEST_F(BlobDBTest, DISABLED_GCOldestSimpleBlobFileWhenOutOfSpace) {
// Use mock env to stop wall clock.
Options options;
options.env = mock_env_.get();
BlobDBOptions bdb_options;
bdb_options.is_fifo = true;
bdb_options.blob_dir_size = 100;
bdb_options.blob_file_size = 100;
bdb_options.min_blob_size = 0;
......@@ -927,7 +930,7 @@ TEST_F(BlobDBTest, MigrateFromPlainRocksDB) {
}
// Test to verify that a NoSpace IOError Status is returned on reaching
// blob_dir_size limit.
// blob_dir_size limit.
TEST_F(BlobDBTest, OutOfSpace) {
// Use mock env to stop wall clock.
Options options;
......@@ -949,6 +952,41 @@ TEST_F(BlobDBTest, OutOfSpace) {
ASSERT_TRUE(s.IsNoSpace());
}
TEST_F(BlobDBTest, EvictOldestFileWhenCloseToSpaceLimit) {
// Use mock env to stop wall clock.
Options options;
BlobDBOptions bdb_options;
bdb_options.blob_dir_size = 270;
bdb_options.blob_file_size = 100;
bdb_options.disable_background_tasks = true;
bdb_options.is_fifo = true;
Open(bdb_options);
// Each stored blob has an overhead of about 32 bytes currently.
// So a 100 byte blob should take up 132 bytes.
std::string value(100, 'v');
ASSERT_OK(blob_db_->PutWithTTL(WriteOptions(), "key1", value, 10));
auto *bdb_impl = static_cast<BlobDBImpl *>(blob_db_);
auto blob_files = bdb_impl->TEST_GetBlobFiles();
ASSERT_EQ(1, blob_files.size());
// Adding another 100 byte blob would take the total size to 264 bytes
// (2*132), which is more than 90% of blob_dir_size. So, the oldest file
// should be evicted and put in obsolete files list.
ASSERT_OK(blob_db_->PutWithTTL(WriteOptions(), "key2", value, 60));
auto obsolete_files = bdb_impl->TEST_GetObsoleteFiles();
ASSERT_EQ(1, obsolete_files.size());
ASSERT_TRUE(obsolete_files[0]->Immutable());
ASSERT_EQ(blob_files[0]->BlobFileNumber(),
obsolete_files[0]->BlobFileNumber());
bdb_impl->TEST_DeleteObsoleteFiles();
obsolete_files = bdb_impl->TEST_GetObsoleteFiles();
ASSERT_TRUE(obsolete_files.empty());
}
TEST_F(BlobDBTest, InlineSmallValues) {
constexpr uint64_t kMaxExpiration = 1000;
Random rnd(301);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册