From 0712d541d103cc0b663f543505ed19ef42a8d486 Mon Sep 17 00:00:00 2001 From: Maysam Yabandeh Date: Thu, 29 Dec 2016 15:48:24 -0800 Subject: [PATCH] Delegate Cleanables Summary: Cleanable objects will perform the registered cleanups when they are destructed. We however rather to delay this cleaning like when we are gathering the merge operands. Current approach is to create the Cleanable object on heap (instead of on stack) and delay deleting it. By allowing Cleanables to delegate their cleanups to another cleanable object we can delay the cleaning without however the need to craete the cleanable object on heap and keeping it around. This patch applies this technique for the cleanups of BlockIter and shows improved performance for some in-memory benchmarks: +1.8% for merge worklaod, +6.4% for non-merge workload when the merge operator is specified. https://our.intern.facebook.com/intern/tasks?t=15168163 Non-merge benchmark: TEST_TMPDIR=/dev/shm/v100nocomp/ ./db_bench --benchmarks=fillrandom --num=1000000 -value_size=100 -compression_type=none Reading random with no merge operator specified: TEST_TMPDIR=/dev/shm/v100nocomp/ ./db_bench --benchmarks="read Closes https://github.com/facebook/rocksdb/pull/1711 Differential Revision: D4361163 Pulled By: maysamyabandeh fbshipit-source-id: 9801e07 --- Makefile | 4 + db/pinned_iterators_manager.h | 4 +- include/rocksdb/iterator.h | 10 ++ table/block_based_table_reader.cc | 45 +++----- table/cleanable_test.cc | 183 ++++++++++++++++++++++++++++++ table/iterator.cc | 58 +++++++++- 6 files changed, 269 insertions(+), 35 deletions(-) create mode 100644 table/cleanable_test.cc diff --git a/Makefile b/Makefile index d768647b9..11220ffa5 100644 --- a/Makefile +++ b/Makefile @@ -324,6 +324,7 @@ TESTS = \ db_properties_test \ db_table_properties_test \ autovector_test \ + cleanable_test \ column_family_test \ table_properties_collector_test \ arena_test \ @@ -1147,6 +1148,9 @@ full_filter_block_test: table/full_filter_block_test.o $(LIBOBJECTS) $(TESTHARNE log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) +cleanable_test: table/cleanable_test.o $(LIBOBJECTS) $(TESTHARNESS) + $(AM_LINK) + table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) diff --git a/db/pinned_iterators_manager.h b/db/pinned_iterators_manager.h index 9f814f0d9..77881a2ef 100644 --- a/db/pinned_iterators_manager.h +++ b/db/pinned_iterators_manager.h @@ -16,7 +16,7 @@ namespace rocksdb { // PinnedIteratorsManager will be notified whenever we need to pin an Iterator // and it will be responsible for deleting pinned Iterators when they are // not needed anymore. -class PinnedIteratorsManager { +class PinnedIteratorsManager : public Cleanable { public: PinnedIteratorsManager() : pinning_enabled(false) {} ~PinnedIteratorsManager() { @@ -67,6 +67,8 @@ class PinnedIteratorsManager { release_func(ptr); } pinned_ptrs_.clear(); + // Also do cleanups from the base Cleanable + Cleanable::Reset(); } private: diff --git a/include/rocksdb/iterator.h b/include/rocksdb/iterator.h index afc98d22b..9d17989a7 100644 --- a/include/rocksdb/iterator.h +++ b/include/rocksdb/iterator.h @@ -36,6 +36,9 @@ class Cleanable { // not abstract and therefore clients should not override it. typedef void (*CleanupFunction)(void* arg1, void* arg2); void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2); + void DelegateCleanupsTo(Cleanable* other); + // DoCleanup and also resets the pointers for reuse + void Reset(); protected: struct Cleanup { @@ -45,6 +48,13 @@ class Cleanable { Cleanup* next; }; Cleanup cleanup_; + // It also becomes the owner of c + void RegisterCleanup(Cleanup* c); + + private: + // Performs all the cleanups. It does not reset the pointers. Making it + // private to prevent misuse + inline void DoCleanup(); }; class Iterator : public Cleanable { diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 6617d2ec2..f85d570b8 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -1539,7 +1539,6 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, PinnedIteratorsManager* pinned_iters_mgr = get_context->pinned_iters_mgr(); bool pin_blocks = pinned_iters_mgr && pinned_iters_mgr->PinningEnabled(); - BlockIter* biter = nullptr; bool done = false; for (iiter.Seek(key); iiter.Valid() && !done; iiter.Next()) { @@ -1558,60 +1557,42 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL); break; } else { - BlockIter stack_biter; - if (pin_blocks) { - // We need to create the BlockIter on heap because we may need to - // pin it if we encounterd merge operands - biter = static_cast( - NewDataBlockIterator(rep_, read_options, iiter.value())); - } else { - biter = &stack_biter; - NewDataBlockIterator(rep_, read_options, iiter.value(), biter); - } + BlockIter biter; + NewDataBlockIterator(rep_, read_options, iiter.value(), &biter); if (read_options.read_tier == kBlockCacheTier && - biter->status().IsIncomplete()) { + biter.status().IsIncomplete()) { // couldn't get block from block_cache // Update Saver.state to Found because we are only looking for whether // we can guarantee the key is not there when "no_io" is set get_context->MarkKeyMayExist(); break; } - if (!biter->status().ok()) { - s = biter->status(); + if (!biter.status().ok()) { + s = biter.status(); break; } // Call the *saver function on each entry/block until it returns false - for (biter->Seek(key); biter->Valid(); biter->Next()) { + for (biter.Seek(key); biter.Valid(); biter.Next()) { ParsedInternalKey parsed_key; - if (!ParseInternalKey(biter->key(), &parsed_key)) { + if (!ParseInternalKey(biter.key(), &parsed_key)) { s = Status::Corruption(Slice()); } - if (!get_context->SaveValue(parsed_key, biter->value(), pin_blocks)) { + if (!get_context->SaveValue(parsed_key, biter.value(), pin_blocks)) { done = true; break; } } - s = biter->status(); - - if (pin_blocks) { - if (get_context->State() == GetContext::kMerge) { - // Pin blocks as long as we are merging - pinned_iters_mgr->PinIterator(biter); - } else { - delete biter; - } - biter = nullptr; - } else { - // biter is on stack, Nothing to clean + s = biter.status(); + + if (pin_blocks && get_context->State() == GetContext::kMerge) { + // Pin blocks as long as we are merging + biter.DelegateCleanupsTo(pinned_iters_mgr); } } } - if (pin_blocks && biter != nullptr) { - delete biter; - } if (s.ok()) { s = iiter.status(); } diff --git a/table/cleanable_test.cc b/table/cleanable_test.cc new file mode 100644 index 000000000..717e20ea6 --- /dev/null +++ b/table/cleanable_test.cc @@ -0,0 +1,183 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include + +#include "port/port.h" +#include "port/stack_trace.h" +#include "rocksdb/iostats_context.h" +#include "rocksdb/perf_context.h" +#include "util/testharness.h" +#include "util/testutil.h" + +namespace rocksdb { + +class CleanableTest : public testing::Test {}; + +// Use this to keep track of the cleanups that were actually performed +void Multiplier(void* arg1, void* arg2) { + int* res = reinterpret_cast(arg1); + int* num = reinterpret_cast(arg2); + *res *= *num; +} + +// the first Cleanup is on stack and the rest on heap, so test with both cases +TEST_F(CleanableTest, Register) { + int n2 = 2, n3 = 3; + int res = 1; + { Cleanable c1; } + // ~Cleanable + ASSERT_EQ(1, res); + + res = 1; + { + Cleanable c1; + c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; + } + // ~Cleanable + ASSERT_EQ(2, res); + + res = 1; + { + Cleanable c1; + c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; + c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; + } + // ~Cleanable + ASSERT_EQ(6, res); +} + +// the first Cleanup is on stack and the rest on heap, +// so test all the combinations of them +TEST_F(CleanableTest, Delegation) { + int n2 = 2, n3 = 3, n5 = 5, n7 = 7; + int res = 1; + { + Cleanable c2; + { + Cleanable c1; + c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; + c1.DelegateCleanupsTo(&c2); + } + // ~Cleanable + ASSERT_EQ(1, res); + } + // ~Cleanable + ASSERT_EQ(2, res); + + res = 1; + { + Cleanable c2; + { + Cleanable c1; + c1.DelegateCleanupsTo(&c2); + } + // ~Cleanable + ASSERT_EQ(1, res); + } + // ~Cleanable + ASSERT_EQ(1, res); + + res = 1; + { + Cleanable c2; + { + Cleanable c1; + c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; + c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; + c1.DelegateCleanupsTo(&c2); + } + // ~Cleanable + ASSERT_EQ(1, res); + } + // ~Cleanable + ASSERT_EQ(6, res); + + res = 1; + { + Cleanable c2; + c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; + { + Cleanable c1; + c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; + c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; + c1.DelegateCleanupsTo(&c2); // res = 2 * 3 * 5; + } + // ~Cleanable + ASSERT_EQ(1, res); + } + // ~Cleanable + ASSERT_EQ(30, res); + + res = 1; + { + Cleanable c2; + c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; + c2.RegisterCleanup(Multiplier, &res, &n7); // res = 5 * 7; + { + Cleanable c1; + c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; + c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; + c1.DelegateCleanupsTo(&c2); // res = 2 * 3 * 5 * 7; + } + // ~Cleanable + ASSERT_EQ(1, res); + } + // ~Cleanable + ASSERT_EQ(210, res); + + res = 1; + { + Cleanable c2; + c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; + c2.RegisterCleanup(Multiplier, &res, &n7); // res = 5 * 7; + { + Cleanable c1; + c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; + c1.DelegateCleanupsTo(&c2); // res = 2 * 5 * 7; + } + // ~Cleanable + ASSERT_EQ(1, res); + } + // ~Cleanable + ASSERT_EQ(70, res); + + res = 1; + { + Cleanable c2; + c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; + c2.RegisterCleanup(Multiplier, &res, &n7); // res = 5 * 7; + { + Cleanable c1; + c1.DelegateCleanupsTo(&c2); // res = 5 * 7; + } + // ~Cleanable + ASSERT_EQ(1, res); + } + // ~Cleanable + ASSERT_EQ(35, res); + + res = 1; + { + Cleanable c2; + c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; + { + Cleanable c1; + c1.DelegateCleanupsTo(&c2); // res = 5; + } + // ~Cleanable + ASSERT_EQ(1, res); + } + // ~Cleanable + ASSERT_EQ(5, res); +} + +} // namespace rocksdb + +int main(int argc, char** argv) { + rocksdb::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/table/iterator.cc b/table/iterator.cc index 93b07bfef..a90c720d6 100644 --- a/table/iterator.cc +++ b/table/iterator.cc @@ -19,10 +19,18 @@ Cleanable::Cleanable() { cleanup_.next = nullptr; } -Cleanable::~Cleanable() { +Cleanable::~Cleanable() { DoCleanup(); } + +void Cleanable::Reset() { + DoCleanup(); + cleanup_.function = nullptr; + cleanup_.next = nullptr; +} + +void Cleanable::DoCleanup() { if (cleanup_.function != nullptr) { (*cleanup_.function)(cleanup_.arg1, cleanup_.arg2); - for (Cleanup* c = cleanup_.next; c != nullptr; ) { + for (Cleanup* c = cleanup_.next; c != nullptr;) { (*c->function)(c->arg1, c->arg2); Cleanup* next = c->next; delete c; @@ -31,6 +39,52 @@ Cleanable::~Cleanable() { } } +// If the entire linked list was on heap we could have simply add attach one +// link list to another. However the head is an embeded object to avoid the cost +// of creating objects for most of the use cases when the Cleanable has only one +// Cleanup to do. We could put evernything on heap if benchmarks show no +// negative impact on performance. +// Also we need to iterate on the linked list since there is no pointer to the +// tail. We can add the tail pointer but maintainin it might negatively impact +// the perforamnce for the common case of one cleanup where tail pointer is not +// needed. Again benchmarks could clarify that. +// Even without a tail pointer we could iterate on the list, find the tail, and +// have only that node updated without the need to insert the Cleanups one by +// one. This however would be redundant when the source Cleanable has one or a +// few Cleanups which is the case most of the time. +// TODO(myabandeh): if the list is too long we should maintain a tail pointer +// and have the entire list (minus the head that has to be inserted separately) +// merged with the target linked list at once. +void Cleanable::DelegateCleanupsTo(Cleanable* other) { + assert(other != nullptr); + if (cleanup_.function == nullptr) { + return; + } + Cleanup* c = &cleanup_; + other->RegisterCleanup(c->function, c->arg1, c->arg2); + c = c->next; + while (c != nullptr) { + Cleanup* next = c->next; + other->RegisterCleanup(c); + c = next; + } + cleanup_.function = nullptr; + cleanup_.next = nullptr; +} + +void Cleanable::RegisterCleanup(Cleanable::Cleanup* c) { + assert(c != nullptr); + if (cleanup_.function == nullptr) { + cleanup_.function = c->function; + cleanup_.arg1 = c->arg1; + cleanup_.arg2 = c->arg2; + delete c; + } else { + c->next = cleanup_.next; + cleanup_.next = c; + } +} + void Cleanable::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) { assert(func != nullptr); Cleanup* c; -- GitLab