diff --git a/db/db_impl.cc b/db/db_impl.cc index 9cff6991d47161406a289a39ec047bb07f3d193d..8be77d57e990fc9eaf26022066684ccaf3fc16be 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -2093,13 +2093,11 @@ Status DBImpl::Get(const ReadOptions& options, return GetImpl(options, key, value); } -// If no_io is true, then returns Status::NotFound if key is not in memtable, -// immutable-memtable and bloom-filters can guarantee that key is not in db, -// "value" is garbage string if no_io is true Status DBImpl::GetImpl(const ReadOptions& options, const Slice& key, std::string* value, - const bool no_io) { + const bool no_io, + bool* value_found) { Status s; StopWatch sw(env_, options_.statistics, DB_GET); @@ -2128,12 +2126,12 @@ Status DBImpl::GetImpl(const ReadOptions& options, // s is both in/out. When in, s could either be OK or MergeInProgress. // value will contain the current merge operand in the latter case. LookupKey lkey(key, snapshot); - if (mem->Get(lkey, value, &s, options_, no_io)) { + if (mem->Get(lkey, value, &s, options_)) { // Done - } else if (imm.Get(lkey, value, &s, options_, no_io)) { + } else if (imm.Get(lkey, value, &s, options_)) { // Done } else { - current->Get(options, lkey, value, &s, &stats, options_, no_io); + current->Get(options, lkey, value, &s, &stats, options_, no_io,value_found); have_stat_update = true; } mutex_.Lock(); @@ -2223,19 +2221,14 @@ std::vector DBImpl::MultiGet(const ReadOptions& options, return statList; } -bool DBImpl::KeyMayExist(const Slice& key) { - return KeyMayExistImpl(key, versions_->LastSequence()); -} - -bool DBImpl::KeyMayExistImpl(const Slice& key, - const SequenceNumber read_from_seq) { - std::string value; - SnapshotImpl read_from_snapshot; - read_from_snapshot.number_ = read_from_seq; - ReadOptions ropts; - ropts.snapshot = &read_from_snapshot; - const Status s = GetImpl(ropts, key, &value, true); - return !s.IsNotFound(); +bool DBImpl::KeyMayExist(const ReadOptions& options, + const Slice& key, + std::string* value, + bool* value_found) { + if (value_found != nullptr) { + *value_found = true; // falsify later if key-may-exist but can't fetch value + } + return GetImpl(options, key, value, true, value_found).ok(); } Iterator* DBImpl::NewIterator(const ReadOptions& options) { diff --git a/db/db_impl.h b/db/db_impl.h index dedfd9d7ed6a645cef73fed71761f9c97e24faf3..333a868676175b34a53a26587e2982755ff15048 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -50,9 +50,14 @@ class DBImpl : public DB { const std::vector& keys, std::vector* values); - // Returns false if key can't exist- based on memtable, immutable-memtable and - // bloom-filters; true otherwise. No IO is performed - virtual bool KeyMayExist(const Slice& key); + // Returns false if key doesn't exist in the database and true if it may. + // If value_found is not passed in as null, then return the value if found in + // memory. On return, if value was found, then value_found will be set to true + // , otherwise false. + virtual bool KeyMayExist(const ReadOptions& options, + const Slice& key, + std::string* value, + bool* value_found = nullptr); virtual Iterator* NewIterator(const ReadOptions&); virtual const Snapshot* GetSnapshot(); virtual void ReleaseSnapshot(const Snapshot* snapshot); @@ -104,15 +109,6 @@ class DBImpl : public DB { // Trigger's a background call for testing. void TEST_PurgeObsoleteteWAL(); - // KeyMayExist's internal function, but can be called internally from rocksdb - // to check memtable from sequence_number=read_from_seq. This is useful to - // check presence of key in db when key's existence is to be also checked in - // an incompletely written WriteBatch in memtable. eg. Database doesn't have - // key A and WriteBatch=[PutA,B; DelA]. A KeyMayExist called from DelA also - // needs to check itself for any PutA to be sure to not drop the delete. - bool KeyMayExistImpl(const Slice& key, - const SequenceNumber read_from_seq); - protected: Env* const env_; const std::string dbname_; @@ -415,11 +411,13 @@ class DBImpl : public DB { std::vector& snapshots, SequenceNumber* prev_snapshot); - // Function that Get and KeyMayExistImpl call with no_io true or false + // Function that Get and KeyMayExist call with no_io true or false + // Note: 'value_found' from KeyMayExist propagates here Status GetImpl(const ReadOptions& options, const Slice& key, std::string* value, - const bool no_io = false); + const bool no_io = false, + bool* value_found = nullptr); }; // Sanitize db options. The caller should delete result.info_log if diff --git a/db/db_test.cc b/db/db_test.cc index d25fe8b3ef27f9b27948b1869e21c4570439fa6f..9a7bb2eb0e8aec7c432dbefe4647b7fbcdd01775 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -772,34 +772,41 @@ TEST(DBTest, GetEncountersEmptyLevel) { } while (ChangeOptions()); } -// KeyMayExist-API returns false if memtable(s) and in-memory bloom-filters can -// guarantee that the key doesn't exist in the db, else true. This can lead to -// a few false positives, but not false negatives. To make test deterministic, -// use a much larger number of bits per key-20 than bits in the key, so -// that false positives are eliminated +// KeyMayExist can lead to a few false positives, but not false negatives. +// To make test deterministic, use a much larger number of bits per key-20 than +// bits in the key, so that false positives are eliminated TEST(DBTest, KeyMayExist) { do { + ReadOptions ropts; + std::string value; Options options = CurrentOptions(); options.filter_policy = NewBloomFilterPolicy(20); Reopen(&options); - ASSERT_TRUE(!db_->KeyMayExist("a")); + ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value)); ASSERT_OK(db_->Put(WriteOptions(), "a", "b")); - ASSERT_TRUE(db_->KeyMayExist("a")); + bool value_found = false; + ASSERT_TRUE(db_->KeyMayExist(ropts, "a", &value, &value_found)); + ASSERT_TRUE(value_found); + ASSERT_EQ("b", value); dbfull()->Flush(FlushOptions()); - ASSERT_TRUE(db_->KeyMayExist("a")); + value.clear(); + value_found = false; + ASSERT_TRUE(db_->KeyMayExist(ropts, "a", &value, &value_found)); + ASSERT_TRUE(value_found); + ASSERT_EQ("b", value); ASSERT_OK(db_->Delete(WriteOptions(), "a")); - ASSERT_TRUE(!db_->KeyMayExist("a")); + ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value)); dbfull()->Flush(FlushOptions()); dbfull()->CompactRange(nullptr, nullptr); - ASSERT_TRUE(!db_->KeyMayExist("a")); + ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value)); ASSERT_OK(db_->Delete(WriteOptions(), "c")); - ASSERT_TRUE(!db_->KeyMayExist("c")); + ASSERT_TRUE(!db_->KeyMayExist(ropts, "c", &value)); delete options.filter_policy; } while (ChangeOptions()); @@ -3045,7 +3052,13 @@ class ModelDB: public DB { Status::NotSupported("Not implemented.")); return s; } - virtual bool KeyMayExist(const Slice& key) { + virtual bool KeyMayExist(const ReadOptions& options, + const Slice& key, + std::string* value, + bool* value_found = nullptr) { + if (value_found != nullptr) { + *value_found = false; + } return true; // Not Supported directly } virtual Iterator* NewIterator(const ReadOptions& options) { diff --git a/db/memtable.cc b/db/memtable.cc index fb3f4f1f7b9ab05f4f72a6020885aa80762f184b..6220588046ec86bbe951d9b64873d337446b6dac 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -130,7 +130,7 @@ void MemTable::Add(SequenceNumber s, ValueType type, } bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, - const Options& options, const bool check_presence_only) { + const Options& options) { Slice memkey = key.memtable_key(); std::shared_ptr iter(table_.get()->GetIterator()); iter->Seek(memkey.data()); @@ -174,10 +174,6 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, return true; } case kTypeMerge: { - if (check_presence_only) { - *s = Status::OK(); - return true; - } Slice v = GetLengthPrefixedSlice(key_ptr + key_length); if (merge_in_progress) { merge_operator->Merge(key.user_key(), &v, operand, diff --git a/db/memtable.h b/db/memtable.h index 6a3c7fcfd6601c1023d8d8ecbdebf49a76472ac5..73d32fc4c5a880112442ec63c5ef46915874b17b 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -73,13 +73,12 @@ class MemTable { // If memtable contains a deletion for key, store a NotFound() error // in *status and return true. // If memtable contains Merge operation as the most recent entry for a key, - // and if check_presence_only is set, return true with Status::OK, - // else if the merge process does not stop (not reaching a value or delete), + // and the merge process does not stop (not reaching a value or delete), // store the current merged result in value and MergeInProgress in s. // return false // Else, return false. bool Get(const LookupKey& key, std::string* value, Status* s, - const Options& options, const bool check_presence_only = false); + const Options& options); // Returns the edits area that is needed for flushing the memtable VersionEdit* GetEdits() { return &edit_; } diff --git a/db/memtablelist.cc b/db/memtablelist.cc index ac89d10433eee90a85c1c25e179267f020cc86bd..9d8b675bf828391623363b0d5534883e631b2a85 100644 --- a/db/memtablelist.cc +++ b/db/memtablelist.cc @@ -194,10 +194,10 @@ size_t MemTableList::ApproximateMemoryUsage() { // Search all the memtables starting from the most recent one. // Return the most recent value found, if any. bool MemTableList::Get(const LookupKey& key, std::string* value, Status* s, - const Options& options, const bool check_presence_only) { + const Options& options) { for (list::iterator it = memlist_.begin(); it != memlist_.end(); ++it) { - if ((*it)->Get(key, value, s, options, check_presence_only)) { + if ((*it)->Get(key, value, s, options)) { return true; } } diff --git a/db/memtablelist.h b/db/memtablelist.h index d741a663007181bc0a5099b17fab4e87f284b06f..b30089cf696276e6c86fd8fcbb1d5cbdb6aa6e2f 100644 --- a/db/memtablelist.h +++ b/db/memtablelist.h @@ -70,7 +70,7 @@ class MemTableList { // Search all the memtables starting from the most recent one. // Return the most recent value found, if any. bool Get(const LookupKey& key, std::string* value, Status* s, - const Options& options, const bool check_presence_only = false); + const Options& options); // Returns the list of underlying memtables. void GetMemTables(std::vector* list); diff --git a/db/version_set.cc b/db/version_set.cc index 15ff1330f6d1f687de297052093f951454b3f94a..48588c712cbfed9f14c93aa5008234488e5d8cf4 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -238,6 +238,7 @@ struct Saver { SaverState state; const Comparator* ucmp; Slice user_key; + bool* value_found; // Is value set correctly? Used by KeyMayExist std::string* value; const MergeOperator* merge_operator; Logger* logger; @@ -245,13 +246,17 @@ struct Saver { }; } -// Called from TableCache::Get when bloom-filters can't guarantee that key does -// not exist and Get is not permitted to do IO to read the data-block and be -// certain. -// Set the key as Found and let the caller know that key-may-exist +// Called from TableCache::Get and InternalGet when file/block in which key may +// exist are not there in TableCache/BlockCache respectively. In this case we +// can't guarantee that key does not exist and are not permitted to do IO to be +// certain.Set the status=kFound and value_found=false to let the caller know +// that key may exist but is not there in memory static void MarkKeyMayExist(void* arg) { Saver* s = reinterpret_cast(arg); s->state = kFound; + if (s->value_found != nullptr) { + *(s->value_found) = false; + } } static bool SaveValue(void* arg, const Slice& ikey, const Slice& v, bool didIO){ @@ -339,7 +344,8 @@ void Version::Get(const ReadOptions& options, Status *status, GetStats* stats, const Options& db_options, - const bool no_io) { + const bool no_io, + bool* value_found) { Slice ikey = k.internal_key(); Slice user_key = k.user_key(); const Comparator* ucmp = vset_->icmp_.user_comparator(); @@ -355,6 +361,7 @@ void Version::Get(const ReadOptions& options, saver.state = status->ok()? kNotFound : kMerge; saver.ucmp = ucmp; saver.user_key = user_key; + saver.value_found = value_found; saver.value = value; saver.merge_operator = merge_operator; saver.logger = logger.get(); diff --git a/db/version_set.h b/db/version_set.h index 11bfb9961427e6279634252f3bfc4404f2f1544a..320ff8e689bc4e21748bc548de895d218ae1a3bf 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -75,7 +75,7 @@ class Version { }; void Get(const ReadOptions&, const LookupKey& key, std::string* val, Status* status, GetStats* stats, const Options& db_option, - const bool no_io = false); + const bool no_io = false, bool* value_found = nullptr); // Adds "stats" into the current state. Returns true if a new // compaction may need to be triggered, false otherwise. diff --git a/db/write_batch.cc b/db/write_batch.cc index 9845f49ad9bbf1ff3cab327dd54638f0054ac1b2..a4213cd97459900b9e28904c05a79b5c25194610 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -16,10 +16,12 @@ #include "leveldb/write_batch.h" +#include "leveldb/options.h" #include "leveldb/statistics.h" #include "db/dbformat.h" #include "db/db_impl.h" #include "db/memtable.h" +#include "db/snapshot.h" #include "db/write_batch_internal.h" #include "util/coding.h" #include @@ -167,9 +169,16 @@ class MemTableInserter : public WriteBatch::Handler { sequence_++; } virtual void Delete(const Slice& key) { - if (filter_deletes_ && !db_->KeyMayExistImpl(key, sequence_)) { - RecordTick(options_->statistics, NUMBER_FILTERED_DELETES); - return; + if (filter_deletes_) { + SnapshotImpl read_from_snapshot; + read_from_snapshot.number_ = sequence_; + ReadOptions ropts; + ropts.snapshot = &read_from_snapshot; + std::string value; + if (!db_->KeyMayExist(ropts, key, &value)) { + RecordTick(options_->statistics, NUMBER_FILTERED_DELETES); + return; + } } mem_->Add(sequence_, kTypeDeletion, key, Slice()); sequence_++; diff --git a/include/leveldb/db.h b/include/leveldb/db.h index e331e7f67f3989418ee1a852231d02afe0267799..0c056c362f6d23b68ac376a6a7d83e1d5583ddc0 100644 --- a/include/leveldb/db.h +++ b/include/leveldb/db.h @@ -104,7 +104,8 @@ class DB { // // May return some other Status on an error. virtual Status Get(const ReadOptions& options, - const Slice& key, std::string* value) = 0; + const Slice& key, + std::string* value) = 0; // If keys[i] does not exist in the database, then the i'th returned // status will be one for which Status::IsNotFound() is true, and @@ -121,11 +122,19 @@ class DB { std::vector* values) = 0; // If the key definitely does not exist in the database, then this method - // returns false. Otherwise return true. This check is potentially - // lighter-weight than invoking DB::Get(). One way to make this lighter weight - // is to avoid doing any IOs - // Default implementation here returns true - virtual bool KeyMayExist(const Slice& key) { + // returns false, else true. If the caller wants to obtain value when the key + // is found in memory, a bool for 'value_found' must be passed. 'value_found' + // will be true on return if value has been set properly. + // This check is potentially lighter-weight than invoking DB::Get(). One way + // to make this lighter weight is to avoid doing any IOs. + // Default implementation here returns true and sets 'value_found' to false + virtual bool KeyMayExist(const ReadOptions& options, + const Slice& key, + std::string* value, + bool* value_found = nullptr) { + if (value_found != nullptr) { + *value_found = false; + } return true; } diff --git a/include/leveldb/options.h b/include/leveldb/options.h index c591ad515c64bc410a7ec7838a661ae1b289ddec..1bf0f6dda51bb2d286282ed090abb671b23cab06 100644 --- a/include/leveldb/options.h +++ b/include/leveldb/options.h @@ -473,12 +473,10 @@ struct Options { // Default: 0 uint64_t bytes_per_sync; - // Use bloom-filter for deletes when this is true. - // db->Delete first calls KeyMayExist which checks memtable,immutable-memtable - // and bloom-filters to determine if the key does not exist in the database. - // If the key definitely does not exist, then the delete is a noop.KeyMayExist - // only incurs in-memory look up. This optimization avoids writing the delete - // to storage when appropriate. + // Use KeyMayExist API to filter deletes when this is true. + // If KeyMayExist returns false, i.e. the key definitely does not exist, then + // the delete is a noop. KeyMayExist only incurs in-memory look up. + // This optimization avoids writing the delete to storage when appropriate. // Default: false bool filter_deletes; diff --git a/table/table.cc b/table/table.cc index c51a0aa44e6955ea34fc00996673ef8472ddc4fe..80c5ef491b1a4c139c8e4c11abfbbefd70b476d1 100644 --- a/table/table.cc +++ b/table/table.cc @@ -235,7 +235,8 @@ Iterator* Table::BlockReader(void* arg, const ReadOptions& options, const Slice& index_value, bool* didIO, - bool for_compaction) { + bool for_compaction, + const bool no_io) { Table* table = reinterpret_cast(arg); Cache* block_cache = table->rep_->options.block_cache.get(); std::shared_ptr statistics = table->rep_->options.statistics; @@ -264,6 +265,8 @@ Iterator* Table::BlockReader(void* arg, block = reinterpret_cast(block_cache->Value(cache_handle)); RecordTick(statistics, BLOCK_CACHE_HIT); + } else if (no_io) { + return nullptr; // Did not find in block_cache and can't do IO } else { Histograms histogram = for_compaction ? READ_BLOCK_COMPACTION_MICROS : READ_BLOCK_GET_MICROS; @@ -286,7 +289,9 @@ Iterator* Table::BlockReader(void* arg, RecordTick(statistics, BLOCK_CACHE_MISS); } - } else { + } else if (no_io) { + return nullptr; // Could not read from block_cache and can't do IO + }else { s = ReadBlock(table->rep_->file.get(), options, handle, &block, didIO); } } @@ -340,16 +345,17 @@ Status Table::InternalGet(const ReadOptions& options, const Slice& k, // cross one data block, we should be fine. RecordTick(rep_->options.statistics, BLOOM_FILTER_USEFUL); break; - } else if (no_io) { - // Update Saver.state to Found because we are only looking for whether - // bloom-filter can guarantee the key is not there when "no_io" - (*mark_key_may_exist)(arg); - done = true; } else { bool didIO = false; Iterator* block_iter = BlockReader(this, options, iiter->value(), - &didIO); + &didIO, no_io); + if (no_io && !block_iter) { // couldn't get block from block_cache + // Update Saver.state to Found because we are only looking for whether + // we can guarantee the key is not there when "no_io" is set + (*mark_key_may_exist)(arg); + break; + } for (block_iter->Seek(k); block_iter->Valid(); block_iter->Next()) { if (!(*saver)(arg, block_iter->key(), block_iter->value(), didIO)) { done = true; diff --git a/table/table.h b/table/table.h index 0be95f368afd2e087d3bb9e4fba1f1b4eac8394f..b39a5c186f13f0e14fd0909003ab2de61fe1a9ca 100644 --- a/table/table.h +++ b/table/table.h @@ -77,7 +77,8 @@ class Table { const EnvOptions& soptions, const Slice&, bool for_compaction); static Iterator* BlockReader(void*, const ReadOptions&, const Slice&, - bool* didIO, bool for_compaction = false); + bool* didIO, bool for_compaction = false, + const bool no_io = false); // Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found // after a call to Seek(key), until handle_result returns false. diff --git a/tools/db_stress.cc b/tools/db_stress.cc index fcd5dc269d9328062328df0f3d283420cf05dd96..ca660237328905bafbb0e80daf665d54df35766a 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -180,7 +180,7 @@ static uint32_t FLAGS_log2_keys_per_lock = 2; // implies 2^2 keys per lock // Percentage of times we want to purge redundant keys in memory before flushing static uint32_t FLAGS_purge_redundant_percent = 50; -// On true, deletes use bloom-filter and drop the delete if key not present +// On true, deletes use KeyMayExist to drop the delete if key not present static bool FLAGS_filter_deletes = false; // Level0 compaction start trigger diff --git a/utilities/ttl/db_ttl.cc b/utilities/ttl/db_ttl.cc index 7dfda720789df5054961c3447b8e8ea59e79dac5..90cc1ec4cad2f503eee9343f63f5588b4aa80c02 100644 --- a/utilities/ttl/db_ttl.cc +++ b/utilities/ttl/db_ttl.cc @@ -158,8 +158,11 @@ std::vector DBWithTTL::MultiGet(const ReadOptions& options, supported with TTL")); } -bool DBWithTTL::KeyMayExist(const Slice& key) { - return db_->KeyMayExist(key); +bool DBWithTTL::KeyMayExist(ReadOptions& options, + const Slice& key, + std::string* value, + bool* value_found) { + return db_->KeyMayExist(options, key, value, value_found); } Status DBWithTTL::Delete(const WriteOptions& wopts, const Slice& key) { diff --git a/utilities/ttl/db_ttl.h b/utilities/ttl/db_ttl.h index d66e396caec1c0d809175ddb50af17668595ded4..078a069ba20284fd04774f21162f8f08b4d736e5 100644 --- a/utilities/ttl/db_ttl.h +++ b/utilities/ttl/db_ttl.h @@ -33,7 +33,10 @@ class DBWithTTL : public DB, CompactionFilter { const std::vector& keys, std::vector* values); - virtual bool KeyMayExist(const Slice& key); + virtual bool KeyMayExist(ReadOptions& options, + const Slice& key, + std::string* value, + bool* value_found = nullptr); virtual Status Delete(const WriteOptions& wopts, const Slice& key);