diff --git a/db/db_bench.cc b/db/db_bench.cc index 63cc906e799d228a550b76b1027d9b9deaa0275d..3ab130093c27b294dcab1ee88a8a597c0ec2a78e 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -22,6 +22,7 @@ #include "rocksdb/memtablerep.h" #include "rocksdb/write_batch.h" #include "rocksdb/statistics.h" +#include "rocksdb/perf_context.h" #include "port/port.h" #include "util/bit_set.h" #include "util/crc32c.h" @@ -350,6 +351,8 @@ DEFINE_int64(stats_interval, 0, "Stats are reported every N operations when " DEFINE_int32(stats_per_interval, 0, "Reports additional stats per interval when" " this is greater than 0."); +DEFINE_int32(perf_level, 0, "Level of perf collection"); + static bool ValidateRateLimit(const char* flagname, double value) { static constexpr double EPSILON = 1e-10; if ( value < -EPSILON ) { @@ -689,6 +692,7 @@ struct SharedState { port::Mutex mu; port::CondVar cv; int total; + int perf_level; // Each thread goes through the following states: // (1) initializing @@ -700,7 +704,7 @@ struct SharedState { long num_done; bool start; - SharedState() : cv(&mu) { } + SharedState() : cv(&mu), perf_level(FLAGS_perf_level) { } }; // Per-thread state for concurrent executions of the same benchmark. @@ -810,6 +814,7 @@ class Benchmark { fprintf(stdout, "Memtablerep: vector\n"); break; } + fprintf(stdout, "Perf Level: %d\n", FLAGS_perf_level); PrintWarnings(); fprintf(stdout, "------------------------------------------------\n"); @@ -1150,6 +1155,7 @@ class Benchmark { } } + SetPerfLevel(static_cast (shared->perf_level)); thread->stats.Start(thread->tid); (arg->bm->*(arg->method))(thread); thread->stats.Stop(); diff --git a/db/db_impl.cc b/db/db_impl.cc index 5a2f0de4a60866e3b03f026c21e7e3fc0f9f4da2..a4e28b032a473a8ca21a264353e9bc3591266123 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1035,7 +1035,7 @@ Status DBImpl::WriteLevel0Table(std::vector &mems, VersionEdit* edit, (unsigned long)m->GetLogNumber()); list.push_back(m->NewIterator()); } - Iterator* iter = NewMergingIterator(&internal_comparator_, &list[0], + Iterator* iter = NewMergingIterator(env_, &internal_comparator_, &list[0], list.size()); const SequenceNumber newest_snapshot = snapshots_.GetNewest(); const SequenceNumber earliest_seqno_in_memtable = @@ -2519,7 +2519,7 @@ Iterator* DBImpl::NewInternalIterator(const ReadOptions& options, // Collect iterators for files in L0 - Ln versions_->current()->AddIterators(options, storage_options_, &list); Iterator* internal_iter = - NewMergingIterator(&internal_comparator_, &list[0], list.size()); + NewMergingIterator(env_, &internal_comparator_, &list[0], list.size()); versions_->current()->Ref(); cleanup->mu = &mutex_; @@ -2555,6 +2555,8 @@ Status DBImpl::GetImpl(const ReadOptions& options, Status s; StopWatch sw(env_, options_.statistics, DB_GET); + StopWatchNano snapshot_timer(env_, false); + StartPerfTimer(&snapshot_timer); SequenceNumber snapshot; mutex_.Lock(); if (options.snapshot != nullptr) { @@ -2583,15 +2585,23 @@ Status DBImpl::GetImpl(const ReadOptions& options, // s is both in/out. When in, s could either be OK or MergeInProgress. // merge_operands will contain the sequence of merges in the latter case. LookupKey lkey(key, snapshot); + BumpPerfTime(&perf_context.get_snapshot_time, &snapshot_timer); if (mem->Get(lkey, value, &s, &merge_operands, options_)) { // Done } else if (imm.Get(lkey, value, &s, &merge_operands, options_)) { // Done } else { + StopWatchNano from_files_timer(env_, false); + StartPerfTimer(&from_files_timer); + current->Get(options, lkey, value, &s, &merge_operands, &stats, options_, value_found); have_stat_update = true; + BumpPerfTime(&perf_context.get_from_output_files_time, &from_files_timer); } + + StopWatchNano post_process_timer(env_, false); + StartPerfTimer(&post_process_timer); mutex_.Lock(); if (!options_.disable_seek_compaction && @@ -2607,6 +2617,8 @@ Status DBImpl::GetImpl(const ReadOptions& options, // Note, tickers are atomic now - no lock protection needed any more. RecordTick(options_.statistics, NUMBER_KEYS_READ); RecordTick(options_.statistics, BYTES_READ, value->size()); + BumpPerfTime(&perf_context.get_post_process_time, &post_process_timer); + return s; } @@ -2615,6 +2627,8 @@ std::vector DBImpl::MultiGet(const ReadOptions& options, std::vector* values) { StopWatch sw(env_, options_.statistics, DB_MULTIGET); + StopWatchNano snapshot_timer(env_, false); + StartPerfTimer(&snapshot_timer); SequenceNumber snapshot; mutex_.Lock(); if (options.snapshot != nullptr) { @@ -2646,6 +2660,7 @@ std::vector DBImpl::MultiGet(const ReadOptions& options, // Keep track of bytes that we read for statistics-recording later uint64_t bytesRead = 0; + BumpPerfTime(&perf_context.get_snapshot_time, &snapshot_timer); // For each of the given keys, apply the entire "get" process as follows: // First look in the memtable, then in the immutable memtable (if any). @@ -2672,6 +2687,8 @@ std::vector DBImpl::MultiGet(const ReadOptions& options, } // Post processing (decrement reference counts and record statistics) + StopWatchNano post_process_timer(env_, false); + StartPerfTimer(&post_process_timer); mutex_.Lock(); if (!options_.disable_seek_compaction && have_stat_update && current->UpdateStats(stats)) { @@ -2686,6 +2703,7 @@ std::vector DBImpl::MultiGet(const ReadOptions& options, RecordTick(options_.statistics, NUMBER_MULTIGET_CALLS); RecordTick(options_.statistics, NUMBER_MULTIGET_KEYS_READ, numKeys); RecordTick(options_.statistics, NUMBER_MULTIGET_BYTES_READ, bytesRead); + BumpPerfTime(&perf_context.get_post_process_time, &post_process_timer); return statList; } @@ -2754,6 +2772,8 @@ Status DBImpl::Delete(const WriteOptions& options, const Slice& key) { } Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { + StopWatchNano pre_post_process_timer(env_, false); + StartPerfTimer(&pre_post_process_timer); Writer w(&mutex_); w.batch = my_batch; w.sync = options.sync; @@ -2800,12 +2820,13 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { if (options.disableWAL) { flush_on_destroy_ = true; } + BumpPerfTime(&perf_context.write_pre_and_post_process_time, + &pre_post_process_timer); if (!options.disableWAL) { StopWatchNano timer(env_); StartPerfTimer(&timer); status = log_->AddRecord(WriteBatchInternal::Contents(updates)); - BumpPerfTime(&perf_context.wal_write_time, &timer); if (status.ok() && options.sync) { if (options_.use_fsync) { StopWatch(env_, options_.statistics, WAL_FILE_SYNC_MICROS); @@ -2815,10 +2836,14 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { status = log_->file()->Sync(); } } + BumpPerfTime(&perf_context.write_wal_time, &timer); } if (status.ok()) { + StopWatchNano write_memtable_timer(env_, false); + StartPerfTimer(&write_memtable_timer); status = WriteBatchInternal::InsertInto(updates, mem_, &options_, this, options_.filter_deletes); + BumpPerfTime(&perf_context.write_memtable_time, &write_memtable_timer); if (!status.ok()) { // Panic for in-memory corruptions // Note that existing logic was not sound. Any partial failure writing @@ -2828,6 +2853,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { } SetTickerCount(options_.statistics, SEQUENCE_NUMBER, last_sequence); } + StartPerfTimer(&pre_post_process_timer); LogFlush(options_.info_log); mutex_.Lock(); if (status.ok()) { @@ -2855,6 +2881,8 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { if (!writers_.empty()) { writers_.front()->cv.Signal(); } + BumpPerfTime(&perf_context.write_pre_and_post_process_time, + &pre_post_process_timer); return status; } diff --git a/db/db_iter.cc b/db/db_iter.cc index 7a395db7aae64b92dc97ef64bcc1b274b52c70b2..9187313f2598cef342f409277a6988753acca097 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -102,7 +102,8 @@ class DBIter: public Iterator { virtual void SeekToLast(); private: - void FindNextUserEntry(bool skipping); + inline void FindNextUserEntry(bool skipping); + void FindNextUserEntryInternal(bool skipping); void FindPrevUserEntry(); bool ParseKey(ParsedInternalKey* key); void MergeValuesNewToOld(); @@ -191,7 +192,15 @@ void DBIter::Next() { // // NOTE: In between, saved_key_ can point to a user key that has // a delete marker -void DBIter::FindNextUserEntry(bool skipping) { +inline void DBIter::FindNextUserEntry(bool skipping) { + StopWatchNano timer(env_, false); + StartPerfTimer(&timer); + FindNextUserEntryInternal(skipping); + BumpPerfTime(&perf_context.find_next_user_entry_time, &timer); +} + +// Actual implementation of DBIter::FindNextUserEntry() +void DBIter::FindNextUserEntryInternal(bool skipping) { // Loop until we hit an acceptable entry to yield assert(iter_->Valid()); assert(direction_ == kForward); @@ -429,7 +438,10 @@ void DBIter::Seek(const Slice& target) { saved_key_.clear(); AppendInternalKey( &saved_key_, ParsedInternalKey(target, sequence_, kValueTypeForSeek)); + StopWatchNano internal_seek_timer(env_, false); + StartPerfTimer(&internal_seek_timer); iter_->Seek(saved_key_); + BumpPerfTime(&perf_context.seek_internal_seek_time, &internal_seek_timer); if (iter_->Valid()) { direction_ = kForward; ClearSavedValue(); @@ -442,7 +454,10 @@ void DBIter::Seek(const Slice& target) { void DBIter::SeekToFirst() { direction_ = kForward; ClearSavedValue(); + StopWatchNano internal_seek_timer(env_, false); + StartPerfTimer(&internal_seek_timer); iter_->SeekToFirst(); + BumpPerfTime(&perf_context.seek_internal_seek_time, &internal_seek_timer); if (iter_->Valid()) { FindNextUserEntry(false /* not skipping */); } else { @@ -461,7 +476,10 @@ void DBIter::SeekToLast() { direction_ = kReverse; ClearSavedValue(); + StopWatchNano internal_seek_timer(env_, false); + StartPerfTimer(&internal_seek_timer); iter_->SeekToLast(); + BumpPerfTime(&perf_context.seek_internal_seek_time, &internal_seek_timer); FindPrevUserEntry(); } diff --git a/db/db_test.cc b/db/db_test.cc index aca07bcff3d3a2090764537dc2f40be98d281f82..ed7425521070d19d9a0f3bdb3584ddfe90c3a315 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -22,6 +22,7 @@ #include "rocksdb/compaction_filter.h" #include "rocksdb/env.h" #include "rocksdb/table.h" +#include "rocksdb/perf_context.h" #include "util/hash.h" #include "util/logging.h" #include "util/mutexlock.h" @@ -1215,7 +1216,13 @@ TEST(DBTest, IterMulti) { ASSERT_EQ(IterStatus(iter), "a->va"); iter->Seek("ax"); ASSERT_EQ(IterStatus(iter), "b->vb"); + + SetPerfLevel(kEnableTime); + perf_context.Reset(); iter->Seek("b"); + ASSERT_TRUE((int) perf_context.seek_internal_seek_time > 0); + ASSERT_TRUE((int) perf_context.find_next_user_entry_time > 0); + SetPerfLevel(kDisable); ASSERT_EQ(IterStatus(iter), "b->vb"); iter->Seek("z"); ASSERT_EQ(IterStatus(iter), "(invalid)"); @@ -1230,7 +1237,12 @@ TEST(DBTest, IterMulti) { // Switch from forward to reverse iter->SeekToFirst(); iter->Next(); + SetPerfLevel(kEnableTime); + perf_context.Reset(); iter->Next(); + ASSERT_EQ(0, (int) perf_context.seek_internal_seek_time); + ASSERT_TRUE((int) perf_context.find_next_user_entry_time > 0); + SetPerfLevel(kDisable); iter->Prev(); ASSERT_EQ(IterStatus(iter), "b->vb"); @@ -1590,22 +1602,42 @@ TEST(DBTest, NumImmutableMemTable) { std::string big_value(1000000, 'x'); std::string num; + SetPerfLevel(kEnableTime);; ASSERT_OK(dbfull()->Put(writeOpt, "k1", big_value)); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "0"); + perf_context.Reset(); + Get("k1"); + ASSERT_EQ(1, (int) perf_context.get_from_memtable_count); ASSERT_OK(dbfull()->Put(writeOpt, "k2", big_value)); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "1"); + perf_context.Reset(); + Get("k1"); + ASSERT_EQ(2, (int) perf_context.get_from_memtable_count); + perf_context.Reset(); + Get("k2"); + ASSERT_EQ(1, (int) perf_context.get_from_memtable_count); ASSERT_OK(dbfull()->Put(writeOpt, "k3", big_value)); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "2"); + perf_context.Reset(); + Get("k2"); + ASSERT_EQ(2, (int) perf_context.get_from_memtable_count); + perf_context.Reset(); + Get("k3"); + ASSERT_EQ(1, (int) perf_context.get_from_memtable_count); + perf_context.Reset(); + Get("k1"); + ASSERT_EQ(3, (int) perf_context.get_from_memtable_count); dbfull()->Flush(FlushOptions()); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "0"); + SetPerfLevel(kDisable); } while (ChangeCompactOptions()); } @@ -1614,11 +1646,16 @@ TEST(DBTest, FLUSH) { Options options = CurrentOptions(); WriteOptions writeOpt = WriteOptions(); writeOpt.disableWAL = true; + SetPerfLevel(kEnableTime);; ASSERT_OK(dbfull()->Put(writeOpt, "foo", "v1")); // this will now also flush the last 2 writes dbfull()->Flush(FlushOptions()); ASSERT_OK(dbfull()->Put(writeOpt, "bar", "v1")); + perf_context.Reset(); + Get("foo"); + ASSERT_TRUE((int) perf_context.get_from_output_files_time > 0); + Reopen(); ASSERT_EQ("v1", Get("foo")); ASSERT_EQ("v1", Get("bar")); @@ -1630,7 +1667,9 @@ TEST(DBTest, FLUSH) { Reopen(); ASSERT_EQ("v2", Get("bar")); + perf_context.Reset(); ASSERT_EQ("v2", Get("foo")); + ASSERT_TRUE((int) perf_context.get_from_output_files_time > 0); writeOpt.disableWAL = false; ASSERT_OK(dbfull()->Put(writeOpt, "bar", "v3")); @@ -1642,6 +1681,8 @@ TEST(DBTest, FLUSH) { // has WAL enabled. ASSERT_EQ("v3", Get("foo")); ASSERT_EQ("v3", Get("bar")); + + SetPerfLevel(kDisable); } while (ChangeCompactOptions()); } diff --git a/db/memtable.cc b/db/memtable.cc index 44eb160e7ba89411b11dce9270af6c7770a1a963..dce0c382f40d4d3952d2c7044dba9f33d341c870 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -19,6 +19,8 @@ #include "util/coding.h" #include "util/mutexlock.h" #include "util/murmurhash.h" +#include "util/perf_context_imp.h" +#include "util/stop_watch.h" namespace std { template <> @@ -161,6 +163,9 @@ void MemTable::Add(SequenceNumber s, ValueType type, bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, std::deque* operands, const Options& options) { + StopWatchNano memtable_get_timer(options.env, false); + StartPerfTimer(&memtable_get_timer); + Slice memkey = key.memtable_key(); std::shared_ptr iter( table_->GetIterator(key.user_key())); @@ -174,7 +179,8 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, auto logger = options.info_log; std::string merge_result; - for (; iter->Valid(); iter->Next()) { + bool found_final_value = false; + for (; !found_final_value && iter->Valid(); iter->Next()) { // entry format is: // klength varint32 // userkey char[klength-8] @@ -211,7 +217,8 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, if (options.inplace_update_support) { GetLock(key.user_key())->Unlock(); } - return true; + found_final_value = true; + break; } case kTypeDeletion: { if (merge_in_progress) { @@ -225,7 +232,8 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, } else { *s = Status::NotFound(Slice()); } - return true; + found_final_value = true; + break; } case kTypeMerge: { Slice v = GetLengthPrefixedSlice(key_ptr + key_length); @@ -259,10 +267,12 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, // No change to value, since we have not yet found a Put/Delete - if (merge_in_progress) { + if (!found_final_value && merge_in_progress) { *s = Status::MergeInProgress(""); } - return false; + BumpPerfTime(&perf_context.get_from_memtable_time, &memtable_get_timer); + BumpPerfCount(&perf_context.get_from_memtable_count); + return found_final_value; } bool MemTable::Update(SequenceNumber seq, ValueType type, diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc index 05416748dc7550e2d7a2ae2ef2681805897c6dc7..2a6e6b7e48b893f831abacc8f20a2a2a3a96ce42 100644 --- a/db/perf_context_test.cc +++ b/db/perf_context_test.cc @@ -174,6 +174,13 @@ void ProfileKeyComparison() { HistogramImpl hist_put; HistogramImpl hist_get; + HistogramImpl hist_get_snapshot; + HistogramImpl hist_get_memtable; + HistogramImpl hist_get_post_process; + HistogramImpl hist_num_memtable_checked; + HistogramImpl hist_write_pre_post; + HistogramImpl hist_write_wal_time; + HistogramImpl hist_write_memtable_time; std::cout << "Inserting " << FLAGS_total_keys << " key/value pairs\n...\n"; @@ -192,16 +199,37 @@ void ProfileKeyComparison() { perf_context.Reset(); db->Put(write_options, key, value); + hist_write_pre_post.Add(perf_context.write_pre_and_post_process_time); + hist_write_wal_time.Add(perf_context.write_wal_time); + hist_write_memtable_time.Add(perf_context.write_memtable_time); hist_put.Add(perf_context.user_key_comparison_count); perf_context.Reset(); db->Get(read_options, key, &value); + hist_get_snapshot.Add(perf_context.get_snapshot_time); + hist_get_memtable.Add(perf_context.get_from_memtable_time); + hist_num_memtable_checked.Add(perf_context.get_from_memtable_count); + hist_get_post_process.Add(perf_context.get_post_process_time); hist_get.Add(perf_context.user_key_comparison_count); } std::cout << "Put uesr key comparison: \n" << hist_put.ToString() << "Get uesr key comparison: \n" << hist_get.ToString(); - + std::cout << "Put(): Pre and Post Process Time: \n" + << hist_write_pre_post.ToString() + << " Writing WAL time: \n" + << hist_write_wal_time.ToString() << "\n" + << " Writing Mem Table time: \n" + << hist_write_memtable_time.ToString() << "\n"; + + std::cout << "Get(): Time to get snapshot: \n" + << hist_get_snapshot.ToString() + << " Time to get value from memtables: \n" + << hist_get_memtable.ToString() << "\n" + << " Number of memtables checked: \n" + << hist_num_memtable_checked.ToString() << "\n" + << " Time to post process: \n" + << hist_get_post_process.ToString() << "\n"; } TEST(PerfContextTest, KeyComparisonCount) { @@ -259,8 +287,8 @@ TEST(PerfContextTest, SeekKeyComparison) { db->Put(write_options, key, value); auto put_time = timer.ElapsedNanos(); hist_put_time.Add(put_time); - hist_wal_time.Add(perf_context.wal_write_time); - hist_time_diff.Add(put_time - perf_context.wal_write_time); + hist_wal_time.Add(perf_context.write_wal_time); + hist_time_diff.Add(put_time - perf_context.write_wal_time); } std::cout << "Put time:\n" << hist_put_time.ToString() diff --git a/db/version_set.cc b/db/version_set.cc index d554657b4652ca3959fbdadfa69cee44007339ed..349abfbaab127c9ab0d2da4561a2473b40e7235d 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2091,7 +2091,7 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) { } } assert(num <= space); - Iterator* result = NewMergingIterator(&icmp_, list, num); + Iterator* result = NewMergingIterator(env_, &icmp_, list, num); delete[] list; return result; } diff --git a/include/rocksdb/perf_context.h b/include/rocksdb/perf_context.h index 9e900e05010a3169037a75a47ed261d4a3c607db..551ca8fe667ba56930eb6fed07504183f11030cb 100644 --- a/include/rocksdb/perf_context.h +++ b/include/rocksdb/perf_context.h @@ -38,7 +38,27 @@ struct PerfContext { uint64_t internal_key_skipped_count; // total number of deletes skipped over during iteration uint64_t internal_delete_skipped_count; - uint64_t wal_write_time; // total time spent on writing to WAL + + uint64_t get_snapshot_time; // total time spent on getting snapshot + uint64_t get_from_memtable_time; // total time spent on querying memtables + uint64_t get_from_memtable_count; // number of mem tables queried + // total time spent after Get() finds a key + uint64_t get_post_process_time; + uint64_t get_from_output_files_time; // total time reading from output files + // total time spent on seeking child iters + uint64_t seek_child_seek_time; + // number of seek issued in child iterators + uint64_t seek_child_seek_count; + uint64_t seek_min_heap_time; // total time spent on the merge heap + // total time spent on seeking the internal entries + uint64_t seek_internal_seek_time; + // total time spent on iterating internal entries to find the next user entry + uint64_t find_next_user_entry_time; + // total time spent on pre or post processing when writing a record + uint64_t write_pre_and_post_process_time; + uint64_t write_wal_time; // total time spent on writing to WAL + // total time spent on writing to mem tables + uint64_t write_memtable_time; }; extern __thread PerfContext perf_context; diff --git a/table/merger.cc b/table/merger.cc index 89faf402becab082c3ce673b6bc8f5eccd35fe57..1aed00cc5c78005bb9687208b2d7ed7b097fcab8 100644 --- a/table/merger.cc +++ b/table/merger.cc @@ -11,8 +11,11 @@ #include "rocksdb/comparator.h" #include "rocksdb/iterator.h" +#include "rocksdb/options.h" #include "table/iter_heap.h" #include "table/iterator_wrapper.h" +#include "util/stop_watch.h" +#include "util/perf_context_imp.h" #include @@ -22,11 +25,13 @@ namespace { class MergingIterator : public Iterator { public: - MergingIterator(const Comparator* comparator, Iterator** children, int n) + MergingIterator(Env* const env, const Comparator* comparator, + Iterator** children, int n) : comparator_(comparator), children_(n), current_(nullptr), use_heap_(true), + env_(env), direction_(kForward), maxHeap_(NewMaxIterHeap(comparator_)), minHeap_ (NewMinIterHeap(comparator_)) { @@ -74,8 +79,14 @@ class MergingIterator : public Iterator { // Invalidate the heap. use_heap_ = false; IteratorWrapper* first_child = nullptr; + StopWatchNano child_seek_timer(env_, false); + StopWatchNano min_heap_timer(env_, false); for (auto& child : children_) { + StartPerfTimer(&child_seek_timer); child.Seek(target); + BumpPerfTime(&perf_context.seek_child_seek_time, &child_seek_timer); + BumpPerfCount(&perf_context.seek_child_seek_count); + if (child.Valid()) { // This child has valid key if (!use_heap_) { @@ -86,24 +97,31 @@ class MergingIterator : public Iterator { } else { // We have more than one children with valid keys. Initialize // the heap and put the first child into the heap. + StartPerfTimer(&min_heap_timer); ClearHeaps(); + BumpPerfTime(&perf_context.seek_min_heap_time, &child_seek_timer); + StartPerfTimer(&min_heap_timer); minHeap_.push(first_child); + BumpPerfTime(&perf_context.seek_min_heap_time, &child_seek_timer); } } if (use_heap_) { + StartPerfTimer(&min_heap_timer); minHeap_.push(&child); + BumpPerfTime(&perf_context.seek_min_heap_time, &child_seek_timer); } } } if (use_heap_) { // If heap is valid, need to put the smallest key to curent_. + StartPerfTimer(&min_heap_timer); FindSmallest(); + BumpPerfTime(&perf_context.seek_min_heap_time, &child_seek_timer); } else { // The heap is not valid, then the current_ iterator is the first // one, or null if there is no first child. current_ = first_child; } - direction_ = kForward; } virtual void Next() { @@ -211,6 +229,7 @@ class MergingIterator : public Iterator { // contain valid rows. If it is false, only current_ can possibly contain // valid rows. bool use_heap_; + Env* const env_; // Which direction is the iterator moving? enum Direction { kForward, @@ -250,14 +269,15 @@ void MergingIterator::ClearHeaps() { } } // namespace -Iterator* NewMergingIterator(const Comparator* cmp, Iterator** list, int n) { +Iterator* NewMergingIterator(Env* const env, const Comparator* cmp, + Iterator** list, int n) { assert(n >= 0); if (n == 0) { return NewEmptyIterator(); } else if (n == 1) { return list[0]; } else { - return new MergingIterator(cmp, list, n); + return new MergingIterator(env, cmp, list, n); } } diff --git a/table/merger.h b/table/merger.h index dbc1f69eb5e0c0b7c1cb7f5bf02f22d12cca3050..74f46ac9b24df83018918ea78dfdb2dc45f7e22d 100644 --- a/table/merger.h +++ b/table/merger.h @@ -13,6 +13,7 @@ namespace rocksdb { class Comparator; class Iterator; +class Env; // Return an iterator that provided the union of the data in // children[0,n-1]. Takes ownership of the child iterators and @@ -23,6 +24,6 @@ class Iterator; // // REQUIRES: n >= 0 extern Iterator* NewMergingIterator( - const Comparator* comparator, Iterator** children, int n); + Env* const env, const Comparator* comparator, Iterator** children, int n); } // namespace rocksdb diff --git a/util/perf_context.cc b/util/perf_context.cc index 1e8ddfb5ea459cba1f33900d999c29e232f0cecd..6833f6836a2e35aa891d6826402e3aa8a8642c98 100644 --- a/util/perf_context.cc +++ b/util/perf_context.cc @@ -22,7 +22,20 @@ void PerfContext::Reset() { block_decompress_time = 0; internal_key_skipped_count = 0; internal_delete_skipped_count = 0; - wal_write_time = 0; + write_wal_time = 0; + + get_snapshot_time = 0; + get_from_memtable_time = 0; + get_from_memtable_count = 0; + get_post_process_time = 0; + get_from_output_files_time = 0; + seek_child_seek_time = 0; + seek_child_seek_count = 0; + seek_min_heap_time = 0; + seek_internal_seek_time = 0; + find_next_user_entry_time = 0; + write_pre_and_post_process_time = 0; + write_memtable_time = 0; } __thread PerfContext perf_context;