Bugfix for issue 33; reduce lock contention in Get(), parallel benchmarks.

- Fix for issue 33 (non-null-terminated result from leveldb_property_value()) - Support for running multiple instances of a benchmark in parallel. - Reduce lock contention on Get(): (1) Do not hold the lock while searching memtables. (2) Shard block and table caches 16-ways. Benchmark for evaluating this change: $ db_bench --benchmarks=fillseq1,readrandom --threads=$n (fillseq1 is a small hack to make sure fillseq runs once regardless of number of threads specified on the command line). git-svn-id: https://leveldb.googlecode.com/svn/trunk@49 62dab493-f737-651d-591e-8d6aee1b9529

Bugfix for issue 33; reduce lock contention in Get(), parallel benchmarks.
- Fix for issue 33 (non-null-terminated result from leveldb_property_value()) - Support for running multiple instances of a benchmark in parallel. - Reduce lock contention on Get(): (1) Do not hold the lock while searching memtables. (2) Shard block and table caches 16-ways. Benchmark for evaluating this change: $ db_bench --benchmarks=fillseq1,readrandom --threads=$n (fillseq1 is a small hack to make sure fillseq runs once regardless of number of threads specified on the command line). git-svn-id: https://leveldb.googlecode.com/svn/trunk@49 62dab493-f737-651d-591e-8d6aee1b9529
e3584f9c · gabor@google.com · ab323f7e · e3584f9c · e3584f9c · e3584f9c
7 changed file
--- a/db/c.cc
+++ b/db/c.cc
@@ -196,7 +196,8 @@ char* leveldb_property_value(
    const char* propname) {
  std::string tmp;
  if (db->rep->GetProperty(Slice(propname), &tmp)) {
-    return CopyString(tmp);
+    // We use strdup() since we expect human readable output.
+    return strdup(tmp.c_str());
  } else {
    return NULL;
  }

--- a/db/db_bench.cc
+++ b/db/db_bench.cc
--- a/db/db_impl.cc
+++ b/db/db_impl.cc
@@ -989,27 +989,37 @@ Status DBImpl::Get(const ReadOptions& options,
    snapshot = versions_->LastSequence();
  }
-  // First look in the memtable, then in the immutable memtable (if any).
+  MemTable* mem = mem_;
-  LookupKey lkey(key, snapshot);
+  MemTable* imm = imm_;
-  if (mem_->Get(lkey, value, &s)) {
-    return s;
-  }
-  if (imm_ != NULL && imm_->Get(lkey, value, &s)) {
-    return s;
-  }
-  // Not in memtable(s); try live files in level order
  Version* current = versions_->current();
+  mem->Ref();
+  if (imm != NULL) imm->Ref();
  current->Ref();
+  bool have_stat_update = false;
  Version::GetStats stats;
-  { // Unlock while reading from files
+  // Unlock while reading from files and memtables
+  {
    mutex_.Unlock();
-    s = current->Get(options, lkey, value, &stats);
+    // First look in the memtable, then in the immutable memtable (if any).
+    LookupKey lkey(key, snapshot);
+    if (mem_->Get(lkey, value, &s)) {
+      // Done
+    } else if (imm_ != NULL && imm_->Get(lkey, value, &s)) {
+      // Done
+    } else {
+      s = current->Get(options, lkey, value, &stats);
+      have_stat_update = true;
+    }
    mutex_.Lock();
  }
-  if (current->UpdateStats(stats)) {
+  if (have_stat_update && current->UpdateStats(stats)) {
    MaybeScheduleCompaction();
  }
+  mem->Unref();
+  if (imm != NULL) imm->Unref();
  current->Unref();
  return s;
 }

--- a/util/cache.cc
+++ b/util/cache.cc
@@ -30,7 +30,8 @@ struct LRUHandle {
  LRUHandle* prev;
  size_t charge;      // TODO(opt): Only allow uint32_t?
  size_t key_length;
-  size_t refs;        // TODO(opt): Pack with "key_length"?
+  uint32_t refs;
+  uint32_t hash;      // Hash of key(); used for fast sharding and comparisons
  char key_data[1];   // Beginning of key
  Slice key() const {
@@ -54,12 +55,12 @@ class HandleTable {
  HandleTable() : length_(0), elems_(0), list_(NULL) { Resize(); }
  ~HandleTable() { delete[] list_; }
-  LRUHandle* Lookup(LRUHandle* h) {
+  LRUHandle* Lookup(const Slice& key, uint32_t hash) {
-    return *FindPointer(h);
+    return *FindPointer(key, hash);
  }
  LRUHandle* Insert(LRUHandle* h) {
-    LRUHandle** ptr = FindPointer(h);
+    LRUHandle** ptr = FindPointer(h->key(), h->hash);
    LRUHandle* old = *ptr;
    h->next_hash = (old == NULL ? NULL : old->next_hash);
    *ptr = h;
@@ -74,8 +75,8 @@ class HandleTable {
    return old;
  }
-  LRUHandle* Remove(LRUHandle* h) {
+  LRUHandle* Remove(const Slice& key, uint32_t hash) {
-    LRUHandle** ptr = FindPointer(h);
+    LRUHandle** ptr = FindPointer(key, hash);
    LRUHandle* result = *ptr;
    if (result != NULL) {
      *ptr = result->next_hash;
@@ -92,13 +93,12 @@ class HandleTable {
  LRUHandle** list_;
  // Return a pointer to slot that points to a cache entry that
-  // matches *h.  If there is no such cache entry, return a pointer to
+  // matches key/hash.  If there is no such cache entry, return a
-  // the trailing slot in the corresponding linked list.
+  // pointer to the trailing slot in the corresponding linked list.
-  LRUHandle** FindPointer(LRUHandle* h) {
+  LRUHandle** FindPointer(const Slice& key, uint32_t hash) {
-    Slice key = h->key();
-    uint32_t hash = Hash(key.data(), key.size(), 0);
    LRUHandle** ptr = &list_[hash & (length_ - 1)];
-    while (*ptr != NULL && key != (*ptr)->key()) {
+    while (*ptr != NULL &&
+           ((*ptr)->hash != hash || key != (*ptr)->key())) {
      ptr = &(*ptr)->next_hash;
    }
    return ptr;
@@ -117,7 +117,7 @@ class HandleTable {
      while (h != NULL) {
        LRUHandle* next = h->next_hash;
        Slice key = h->key();
-        uint32_t hash = Hash(key.data(), key.size(), 0);
+        uint32_t hash = h->hash;
        LRUHandle** ptr = &new_list[hash & (new_length - 1)];
        h->next_hash = *ptr;
        *ptr = h;
@@ -132,26 +132,30 @@ class HandleTable {
  }
 };
-class LRUCache : public Cache {
+// A single shard of sharded cache.
+class LRUCache {
 public:
-  explicit LRUCache(size_t capacity);
+  LRUCache();
-  virtual ~LRUCache();
+  ~LRUCache();
-  virtual Handle* Insert(const Slice& key, void* value, size_t charge,
+  // Separate from constructor so caller can easily make an array of LRUCache
-                         void (*deleter)(const Slice& key, void* value));
+  void SetCapacity(size_t capacity) { capacity_ = capacity; }
-  virtual Handle* Lookup(const Slice& key);
-  virtual void Release(Handle* handle);
+  // Like Cache methods, but with an extra "hash" parameter.
-  virtual void* Value(Handle* handle);
+  Cache::Handle* Insert(const Slice& key, uint32_t hash,
-  virtual void Erase(const Slice& key);
+                        void* value, size_t charge,
-  virtual uint64_t NewId();
+                        void (*deleter)(const Slice& key, void* value));
+  Cache::Handle* Lookup(const Slice& key, uint32_t hash);
+  void Release(Cache::Handle* handle);
+  void Erase(const Slice& key, uint32_t hash);
 private:
  void LRU_Remove(LRUHandle* e);
  void LRU_Append(LRUHandle* e);
  void Unref(LRUHandle* e);
-  // Constructor parameters
+  // Initialized before use.
-  const size_t capacity_;
+  size_t capacity_;
  // mutex_ protects the following state.
  port::Mutex mutex_;
@@ -165,9 +169,8 @@ class LRUCache : public Cache {
  HandleTable table_;
 };
-LRUCache::LRUCache(size_t capacity)
+LRUCache::LRUCache()
-    : capacity_(capacity),
+    : usage_(0),
-      usage_(0),
      last_id_(0) {
  // Make empty circular linked list
  lru_.next = &lru_;
@@ -206,32 +209,25 @@ void LRUCache::LRU_Append(LRUHandle* e) {
  e->next->prev = e;
 }
-Cache::Handle* LRUCache::Lookup(const Slice& key) {
+Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {
  MutexLock l(&mutex_);
+  LRUHandle* e = table_.Lookup(key, hash);
-  LRUHandle dummy;
-  dummy.next = &dummy;
-  dummy.value = const_cast<Slice*>(&key);
-  LRUHandle* e = table_.Lookup(&dummy);
  if (e != NULL) {
    e->refs++;
    LRU_Remove(e);
    LRU_Append(e);
  }
-  return reinterpret_cast<Handle*>(e);
+  return reinterpret_cast<Cache::Handle*>(e);
 }
-void* LRUCache::Value(Handle* handle) {
+void LRUCache::Release(Cache::Handle* handle) {
-  return reinterpret_cast<LRUHandle*>(handle)->value;
-}
-void LRUCache::Release(Handle* handle) {
  MutexLock l(&mutex_);
  Unref(reinterpret_cast<LRUHandle*>(handle));
 }
-Cache::Handle* LRUCache::Insert(const Slice& key, void* value, size_t charge,
+Cache::Handle* LRUCache::Insert(
-                             void (*deleter)(const Slice& key, void* value)) {
+    const Slice& key, uint32_t hash, void* value, size_t charge,
+    void (*deleter)(const Slice& key, void* value)) {
  MutexLock l(&mutex_);
  LRUHandle* e = reinterpret_cast<LRUHandle*>(
@@ -240,6 +236,7 @@ Cache::Handle* LRUCache::Insert(const Slice& key, void* value, size_t charge,
  e->deleter = deleter;
  e->charge = charge;
  e->key_length = key.size();
+  e->hash = hash;
  e->refs = 2;  // One from LRUCache, one for the returned handle
  memcpy(e->key_data, key.data(), key.size());
  LRU_Append(e);
@@ -254,35 +251,77 @@ Cache::Handle* LRUCache::Insert(const Slice& key, void* value, size_t charge,
  while (usage_ > capacity_ && lru_.next != &lru_) {
    LRUHandle* old = lru_.next;
    LRU_Remove(old);
-    table_.Remove(old);
+    table_.Remove(old->key(), old->hash);
    Unref(old);
  }
-  return reinterpret_cast<Handle*>(e);
+  return reinterpret_cast<Cache::Handle*>(e);
 }
-void LRUCache::Erase(const Slice& key) {
+void LRUCache::Erase(const Slice& key, uint32_t hash) {
  MutexLock l(&mutex_);
+  LRUHandle* e = table_.Remove(key, hash);
-  LRUHandle dummy;
-  dummy.next = &dummy;
-  dummy.value = const_cast<Slice*>(&key);
-  LRUHandle* e = table_.Remove(&dummy);
  if (e != NULL) {
    LRU_Remove(e);
    Unref(e);
  }
 }
-uint64_t LRUCache::NewId() {
+static const int kNumShardBits = 4;
-  MutexLock l(&mutex_);
+static const int kNumShards = 1 << kNumShardBits;
-  return ++(last_id_);
-}
+class ShardedLRUCache : public Cache {
+ private:
+  LRUCache shard_[kNumShards];
+  port::Mutex id_mutex_;
+  uint64_t last_id_;
+  static inline uint32_t HashSlice(const Slice& s) {
+    return Hash(s.data(), s.size(), 0);
+  }
+  static uint32_t Shard(uint32_t hash) {
+    return hash >> (32 - kNumShardBits);
+  }
+ public:
+  explicit ShardedLRUCache(size_t capacity) {
+    const size_t per_shard = (capacity + (kNumShards - 1)) / kNumShards;
+    for (int s = 0; s < kNumShards; s++) {
+      shard_[s].SetCapacity(per_shard);
+    }
+  }
+  virtual ~ShardedLRUCache() { }
+  virtual Handle* Insert(const Slice& key, void* value, size_t charge,
+                         void (*deleter)(const Slice& key, void* value)) {
+    const uint32_t hash = HashSlice(key);
+    return shard_[Shard(hash)].Insert(key, hash, value, charge, deleter);
+  }
+  virtual Handle* Lookup(const Slice& key) {
+    const uint32_t hash = HashSlice(key);
+    return shard_[Shard(hash)].Lookup(key, hash);
+  }
+  virtual void Release(Handle* handle) {
+    LRUHandle* h = reinterpret_cast<LRUHandle*>(handle);
+    shard_[Shard(h->hash)].Release(handle);
+  }
+  virtual void Erase(const Slice& key) {
+    const uint32_t hash = HashSlice(key);
+    shard_[Shard(hash)].Erase(key, hash);
+  }
+  virtual void* Value(Handle* handle) {
+    return reinterpret_cast<LRUHandle*>(handle)->value;
+  }
+  virtual uint64_t NewId() {
+    MutexLock l(&id_mutex_);
+    return ++(last_id_);
+  }
+};
 }  // end anonymous namespace
 Cache* NewLRUCache(size_t capacity) {
-  return new LRUCache(capacity);
+  return new ShardedLRUCache(capacity);
 }
 }
--- a/util/cache_test.cc
+++ b/util/cache_test.cc
@@ -32,7 +32,7 @@ class CacheTest {
    current_->deleted_values_.push_back(DecodeValue(v));
  }
-  static const int kCacheSize = 100;
+  static const int kCacheSize = 1000;
  std::vector<int> deleted_keys_;
  std::vector<int> deleted_values_;
  Cache* cache_;
@@ -137,23 +137,40 @@ TEST(CacheTest, EvictionPolicy) {
  Insert(200, 201);
  // Frequently used entry must be kept around
-  for (int i = 0; i < kCacheSize; i++) {
+  for (int i = 0; i < kCacheSize + 100; i++) {
    Insert(1000+i, 2000+i);
    ASSERT_EQ(2000+i, Lookup(1000+i));
    ASSERT_EQ(101, Lookup(100));
  }
  ASSERT_EQ(101, Lookup(100));
-  ASSERT_EQ(2, deleted_keys_.size());
+  ASSERT_EQ(-1, Lookup(200));
-  ASSERT_EQ(200, deleted_keys_[0]);
-  ASSERT_EQ(201, deleted_values_[0]);
 }
-TEST(CacheTest, HeavyEntry) {
+TEST(CacheTest, HeavyEntries) {
-  Insert(100, 101);
+  // Add a bunch of light and heavy entries and then count the combined
-  Insert(200, 201, kCacheSize);
+  // size of items still in the cache, which must be approximately the
-  ASSERT_EQ(1, deleted_keys_.size());
+  // same as the total capacity.
-  ASSERT_EQ(100, deleted_keys_[0]);
+  const int kLight = 1;
-  ASSERT_EQ(101, deleted_values_[0]);
+  const int kHeavy = 10;
+  int added = 0;
+  int index = 0;
+  while (added < 2*kCacheSize) {
+    const int weight = (index & 1) ? kLight : kHeavy;
+    Insert(index, 1000+index, weight);
+    added += weight;
+    index++;
+  }
+  int cached_weight = 0;
+  for (int i = 0; i < index; i++) {
+    const int weight = (i & 1 ? kLight : kHeavy);
+    int r = Lookup(i);
+    if (r >= 0) {
+      cached_weight += weight;
+      ASSERT_EQ(1000+i, r);
+    }
+  }
+  ASSERT_LE(cached_weight, kCacheSize + kCacheSize/10);
 }
 TEST(CacheTest, NewId) {

--- a/util/histogram.cc
+++ b/util/histogram.cc
@@ -55,6 +55,17 @@ void Histogram::Add(double value) {
  sum_squares_ += (value * value);
 }
+void Histogram::Merge(const Histogram& other) {
+  if (other.min_ < min_) min_ = other.min_;
+  if (other.max_ > max_) max_ = other.max_;
+  num_ += other.num_;
+  sum_ += other.sum_;
+  sum_squares_ += other.sum_squares_;
+  for (int b = 0; b < kNumBuckets; b++) {
+    buckets_[b] += other.buckets_[b];
+  }
+}
 double Histogram::Median() const {
  return Percentile(50.0);
 }

--- a/util/histogram.h
+++ b/util/histogram.h
@@ -16,6 +16,7 @@ class Histogram {
  void Clear();
  void Add(double value);
+  void Merge(const Histogram& other);
  std::string ToString() const;