diff --git a/db/prefix_test.cc b/db/prefix_test.cc index 6c7fc1697fb11795ce9dc75471852de1233d5b6b..4b15e63e3ddf70d148c2c7a9c5a5ef4dcf0a9050 100644 --- a/db/prefix_test.cc +++ b/db/prefix_test.cc @@ -22,6 +22,7 @@ DEFINE_uint64(items_per_prefix, 10, "total number of values per prefix"); DEFINE_int64(write_buffer_size, 1000000000, ""); DEFINE_int64(max_write_buffer_number, 8, ""); DEFINE_int64(min_write_buffer_number_to_merge, 7, ""); +DEFINE_int32(skiplist_height, 4, ""); // Path to the database on file system const std::string kDbName = rocksdb::test::TmpDir() + "/prefix_test"; @@ -111,7 +112,8 @@ class PrefixTest { options.prefix_extractor = prefix_extractor; if (FLAGS_use_nolock_version) { options.memtable_factory.reset(NewHashSkipListRepFactory( - prefix_extractor, FLAGS_bucket_count)); + prefix_extractor, FLAGS_bucket_count, + FLAGS_skiplist_height)); } else { options.memtable_factory = std::make_shared( @@ -152,7 +154,7 @@ TEST(PrefixTest, DynamicPrefixIterator) { TestKey test_key(prefix, sorted); Slice key = TestKeyToSlice(test_key); - std::string value = "v" + std::to_string(sorted); + std::string value(40, 0); ASSERT_OK(db->Put(write_options, key, value)); } diff --git a/db/skiplist.h b/db/skiplist.h index 06a35d911577a6d26f1ce060055f15d8b97e44e9..54b4f74467b048fafe249bea6ee2ebe66732d5c3 100644 --- a/db/skiplist.h +++ b/db/skiplist.h @@ -47,7 +47,8 @@ class SkipList { // Create a new SkipList object that will use "cmp" for comparing keys, // and will allocate memory using "*arena". Objects allocated in the arena // must remain allocated for the lifetime of the skiplist object. - explicit SkipList(Comparator cmp, Arena* arena); + explicit SkipList(Comparator cmp, Arena* arena, + int32_t max_height = 12, int32_t branching_factor = 4); // Insert key into the list. // REQUIRES: nothing that compares equal to key is currently in the list. @@ -101,7 +102,8 @@ class SkipList { }; private: - enum { kMaxHeight = 12 }; + const int32_t kMaxHeight_; + const int32_t kBranching_; // Immutable after construction Comparator const compare_; @@ -114,8 +116,8 @@ class SkipList { port::AtomicPointer max_height_; // Height of the entire list // Used for optimizing sequential insert patterns - Node* prev_[kMaxHeight]; - int prev_height_; + Node** prev_; + int32_t prev_height_; inline int GetMaxHeight() const { return static_cast( @@ -257,13 +259,12 @@ inline void SkipList::Iterator::SeekToLast() { template int SkipList::RandomHeight() { // Increase height with probability 1 in kBranching - static const unsigned int kBranching = 4; int height = 1; - while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) { + while (height < kMaxHeight_ && ((rnd_.Next() % kBranching_) == 0)) { height++; } assert(height > 0); - assert(height <= kMaxHeight); + assert(height <= kMaxHeight_); return height; } @@ -353,14 +354,24 @@ typename SkipList::Node* SkipList::FindLast() } template -SkipList::SkipList(Comparator cmp, Arena* arena) - : compare_(cmp), +SkipList::SkipList(Comparator cmp, Arena* arena, + int32_t max_height, + int32_t branching_factor) + : kMaxHeight_(max_height), + kBranching_(branching_factor), + compare_(cmp), arena_(arena), - head_(NewNode(0 /* any key will do */, kMaxHeight)), + head_(NewNode(0 /* any key will do */, max_height)), max_height_(reinterpret_cast(1)), prev_height_(1), rnd_(0xdeadbeef) { - for (int i = 0; i < kMaxHeight; i++) { + assert(kMaxHeight_ > 0); + assert(kBranching_ > 0); + // Allocate the prev_ Node* array, directly from the passed-in arena. + // prev_ does not need to be freed, as its life cycle is tied up with + // the arena as a whole. + prev_ = (Node**) arena_->AllocateAligned(sizeof(Node*) * kMaxHeight_); + for (int i = 0; i < kMaxHeight_; i++) { head_->SetNext(i, nullptr); prev_[i] = head_; } diff --git a/include/rocksdb/memtablerep.h b/include/rocksdb/memtablerep.h index 83b15d2565a5d9e7181a9ec612772568a3749e6a..53a7f506491e4e31dd6239a884f541c34589a37d 100644 --- a/include/rocksdb/memtablerep.h +++ b/include/rocksdb/memtablerep.h @@ -267,9 +267,16 @@ public: // The same as TransformRepFactory except it doesn't use locks. // Experimental, will replace TransformRepFactory once we are sure -// it performs better +// it performs better. It contains a fixed array of buckets, each +// pointing to a skiplist (null if the bucket is empty). +// bucket_count: number of fixed array buckets +// skiplist_height: the max height of the skiplist +// skiplist_branching_factor: probabilistic size ratio between adjacent +// link lists in the skiplist extern MemTableRepFactory* NewHashSkipListRepFactory( - const SliceTransform* transform, size_t bucket_count = 1000000); + const SliceTransform* transform, size_t bucket_count = 1000000, + int32_t skiplist_height = 4, int32_t skiplist_branching_factor = 4 +); } diff --git a/util/hash_skiplist_rep.cc b/util/hash_skiplist_rep.cc index 290ce9d6342584d84dd1a51dcd1595533f5f6e0b..bcc459f6622b3422bd9a7d8bdac4032c0803cb27 100644 --- a/util/hash_skiplist_rep.cc +++ b/util/hash_skiplist_rep.cc @@ -20,7 +20,8 @@ namespace { class HashSkipListRep : public MemTableRep { public: HashSkipListRep(MemTableRep::KeyComparator& compare, Arena* arena, - const SliceTransform* transform, size_t bucket_size); + const SliceTransform* transform, size_t bucket_size, + int32_t skiplist_height, int32_t skiplist_branching_factor); virtual void Insert(const char* key) override; @@ -47,6 +48,9 @@ class HashSkipListRep : public MemTableRep { size_t bucket_size_; + const int32_t skiplist_height_; + const int32_t skiplist_branching_factor_; + // Maps slices (which are transformed user keys) to buckets of keys sharing // the same transform. port::AtomicPointer* buckets_; @@ -215,8 +219,12 @@ class HashSkipListRep : public MemTableRep { }; HashSkipListRep::HashSkipListRep(MemTableRep::KeyComparator& compare, - Arena* arena, const SliceTransform* transform, size_t bucket_size) + Arena* arena, const SliceTransform* transform, + size_t bucket_size, int32_t skiplist_height, + int32_t skiplist_branching_factor) : bucket_size_(bucket_size), + skiplist_height_(skiplist_height), + skiplist_branching_factor_(skiplist_branching_factor), transform_(transform), compare_(compare), arena_(arena), @@ -239,7 +247,8 @@ HashSkipListRep::Bucket* HashSkipListRep::GetInitializedBucket( auto bucket = GetBucket(hash); if (bucket == nullptr) { auto addr = arena_->AllocateAligned(sizeof(Bucket)); - bucket = new (addr) Bucket(compare_, arena_); + bucket = new (addr) Bucket(compare_, arena_, skiplist_height_, + skiplist_branching_factor_); buckets_[hash].Release_Store(static_cast(bucket)); } return bucket; @@ -302,17 +311,23 @@ std::shared_ptr class HashSkipListRepFactory : public MemTableRepFactory { public: - explicit HashSkipListRepFactory(const SliceTransform* transform, - size_t bucket_count = 1000000) - : transform_(transform), - bucket_count_(bucket_count) { } + explicit HashSkipListRepFactory( + const SliceTransform* transform, + size_t bucket_count, + int32_t skiplist_height, + int32_t skiplist_branching_factor) + : transform_(transform), + bucket_count_(bucket_count), + skiplist_height_(skiplist_height), + skiplist_branching_factor_(skiplist_branching_factor) { } virtual ~HashSkipListRepFactory() { delete transform_; } virtual std::shared_ptr CreateMemTableRep( MemTableRep::KeyComparator& compare, Arena* arena) override { return std::make_shared(compare, arena, transform_, - bucket_count_); + bucket_count_, skiplist_height_, + skiplist_branching_factor_); } virtual const char* Name() const override { @@ -324,11 +339,15 @@ class HashSkipListRepFactory : public MemTableRepFactory { private: const SliceTransform* transform_; const size_t bucket_count_; + const int32_t skiplist_height_; + const int32_t skiplist_branching_factor_; }; MemTableRepFactory* NewHashSkipListRepFactory( - const SliceTransform* transform, size_t bucket_count) { - return new HashSkipListRepFactory(transform, bucket_count); + const SliceTransform* transform, size_t bucket_count, + int32_t skiplist_height, int32_t skiplist_branching_factor) { + return new HashSkipListRepFactory(transform, bucket_count, + skiplist_height, skiplist_branching_factor); } } // namespace rocksdb