cache.cc 9.3 KB
Newer Older
J
jorlow@chromium.org 已提交
1 2 3 4 5
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include <assert.h>
6 7
#include <stdio.h>
#include <stdlib.h>
J
jorlow@chromium.org 已提交
8

9
#include "rocksdb/cache.h"
J
jorlow@chromium.org 已提交
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
#include "port/port.h"
#include "util/hash.h"
#include "util/mutexlock.h"

namespace leveldb {

Cache::~Cache() {
}

namespace {

// LRU cache implementation

// An entry is a variable length heap-allocated structure.  Entries
// are kept in a circular doubly linked list ordered by access time.
struct LRUHandle {
  void* value;
  void (*deleter)(const Slice&, void* value);
28
  LRUHandle* next_hash;
J
jorlow@chromium.org 已提交
29 30 31 32
  LRUHandle* next;
  LRUHandle* prev;
  size_t charge;      // TODO(opt): Only allow uint32_t?
  size_t key_length;
33 34
  uint32_t refs;
  uint32_t hash;      // Hash of key(); used for fast sharding and comparisons
J
jorlow@chromium.org 已提交
35 36 37 38 39 40 41 42 43 44 45 46 47
  char key_data[1];   // Beginning of key

  Slice key() const {
    // For cheaper lookups, we allow a temporary Handle object
    // to store a pointer to a key in "value".
    if (next == this) {
      return *(reinterpret_cast<Slice*>(value));
    } else {
      return Slice(key_data, key_length);
    }
  }
};

48 49 50 51 52 53 54
// We provide our own simple hash table since it removes a whole bunch
// of porting hacks and is also faster than some of the built-in hash
// table implementations in some of the compiler/runtime combinations
// we have tested.  E.g., readrandom speeds up by ~5% over the g++
// 4.4.3's builtin hashtable.
class HandleTable {
 public:
A
Abhishek Kona 已提交
55
  HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); }
56 57
  ~HandleTable() { delete[] list_; }

58 59
  LRUHandle* Lookup(const Slice& key, uint32_t hash) {
    return *FindPointer(key, hash);
60 61 62
  }

  LRUHandle* Insert(LRUHandle* h) {
63
    LRUHandle** ptr = FindPointer(h->key(), h->hash);
64
    LRUHandle* old = *ptr;
A
Abhishek Kona 已提交
65
    h->next_hash = (old == nullptr ? nullptr : old->next_hash);
66
    *ptr = h;
A
Abhishek Kona 已提交
67
    if (old == nullptr) {
68 69 70 71 72 73
      ++elems_;
      if (elems_ > length_) {
        // Since each cache entry is fairly large, we aim for a small
        // average linked list length (<= 1).
        Resize();
      }
J
jorlow@chromium.org 已提交
74
    }
75 76 77
    return old;
  }

78 79
  LRUHandle* Remove(const Slice& key, uint32_t hash) {
    LRUHandle** ptr = FindPointer(key, hash);
80
    LRUHandle* result = *ptr;
A
Abhishek Kona 已提交
81
    if (result != nullptr) {
82 83
      *ptr = result->next_hash;
      --elems_;
J
jorlow@chromium.org 已提交
84
    }
85 86 87 88 89 90 91 92 93 94 95
    return result;
  }

 private:
  // The table consists of an array of buckets where each bucket is
  // a linked list of cache entries that hash into the bucket.
  uint32_t length_;
  uint32_t elems_;
  LRUHandle** list_;

  // Return a pointer to slot that points to a cache entry that
96 97 98
  // matches key/hash.  If there is no such cache entry, return a
  // pointer to the trailing slot in the corresponding linked list.
  LRUHandle** FindPointer(const Slice& key, uint32_t hash) {
99
    LRUHandle** ptr = &list_[hash & (length_ - 1)];
A
Abhishek Kona 已提交
100
    while (*ptr != nullptr &&
101
           ((*ptr)->hash != hash || key != (*ptr)->key())) {
102
      ptr = &(*ptr)->next_hash;
J
jorlow@chromium.org 已提交
103
    }
104 105
    return ptr;
  }
J
jorlow@chromium.org 已提交
106

107 108 109 110 111 112 113 114
  void Resize() {
    uint32_t new_length = 4;
    while (new_length < elems_) {
      new_length *= 2;
    }
    LRUHandle** new_list = new LRUHandle*[new_length];
    memset(new_list, 0, sizeof(new_list[0]) * new_length);
    uint32_t count = 0;
D
dgrogan@chromium.org 已提交
115
    for (uint32_t i = 0; i < length_; i++) {
116
      LRUHandle* h = list_[i];
A
Abhishek Kona 已提交
117
      while (h != nullptr) {
118
        LRUHandle* next = h->next_hash;
119
        uint32_t hash = h->hash;
120 121 122 123 124 125
        LRUHandle** ptr = &new_list[hash & (new_length - 1)];
        h->next_hash = *ptr;
        *ptr = h;
        h = next;
        count++;
      }
J
jorlow@chromium.org 已提交
126
    }
127 128 129 130 131 132
    assert(elems_ == count);
    delete[] list_;
    list_ = new_list;
    length_ = new_length;
  }
};
J
jorlow@chromium.org 已提交
133

134 135
// A single shard of sharded cache.
class LRUCache {
J
jorlow@chromium.org 已提交
136
 public:
137 138
  LRUCache();
  ~LRUCache();
J
jorlow@chromium.org 已提交
139

140 141 142 143 144 145 146 147 148 149
  // Separate from constructor so caller can easily make an array of LRUCache
  void SetCapacity(size_t capacity) { capacity_ = capacity; }

  // Like Cache methods, but with an extra "hash" parameter.
  Cache::Handle* Insert(const Slice& key, uint32_t hash,
                        void* value, size_t charge,
                        void (*deleter)(const Slice& key, void* value));
  Cache::Handle* Lookup(const Slice& key, uint32_t hash);
  void Release(Cache::Handle* handle);
  void Erase(const Slice& key, uint32_t hash);
J
jorlow@chromium.org 已提交
150 151 152 153 154 155

 private:
  void LRU_Remove(LRUHandle* e);
  void LRU_Append(LRUHandle* e);
  void Unref(LRUHandle* e);

156 157
  // Initialized before use.
  size_t capacity_;
J
jorlow@chromium.org 已提交
158 159 160 161 162 163 164 165 166 167 168 169

  // mutex_ protects the following state.
  port::Mutex mutex_;
  size_t usage_;

  // Dummy head of LRU list.
  // lru.prev is newest entry, lru.next is oldest entry.
  LRUHandle lru_;

  HandleTable table_;
};

170
LRUCache::LRUCache()
171
    : usage_(0) {
J
jorlow@chromium.org 已提交
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
  // Make empty circular linked list
  lru_.next = &lru_;
  lru_.prev = &lru_;
}

LRUCache::~LRUCache() {
  for (LRUHandle* e = lru_.next; e != &lru_; ) {
    LRUHandle* next = e->next;
    assert(e->refs == 1);  // Error if caller has an unreleased handle
    Unref(e);
    e = next;
  }
}

void LRUCache::Unref(LRUHandle* e) {
  assert(e->refs > 0);
  e->refs--;
  if (e->refs <= 0) {
190

J
jorlow@chromium.org 已提交
191 192 193 194 195 196 197 198
    (*e->deleter)(e->key(), e->value);
    free(e);
  }
}

void LRUCache::LRU_Remove(LRUHandle* e) {
  e->next->prev = e->prev;
  e->prev->next = e->next;
199
  usage_ -= e->charge;
J
jorlow@chromium.org 已提交
200 201 202 203 204 205 206 207
}

void LRUCache::LRU_Append(LRUHandle* e) {
  // Make "e" newest entry by inserting just before lru_
  e->next = &lru_;
  e->prev = lru_.prev;
  e->prev->next = e;
  e->next->prev = e;
208
  usage_ += e->charge;
J
jorlow@chromium.org 已提交
209 210
}

211
Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {
J
jorlow@chromium.org 已提交
212
  MutexLock l(&mutex_);
213
  LRUHandle* e = table_.Lookup(key, hash);
A
Abhishek Kona 已提交
214
  if (e != nullptr) {
J
jorlow@chromium.org 已提交
215 216 217 218
    e->refs++;
    LRU_Remove(e);
    LRU_Append(e);
  }
219
  return reinterpret_cast<Cache::Handle*>(e);
J
jorlow@chromium.org 已提交
220 221
}

222
void LRUCache::Release(Cache::Handle* handle) {
J
jorlow@chromium.org 已提交
223 224 225 226
  MutexLock l(&mutex_);
  Unref(reinterpret_cast<LRUHandle*>(handle));
}

227 228 229
Cache::Handle* LRUCache::Insert(
    const Slice& key, uint32_t hash, void* value, size_t charge,
    void (*deleter)(const Slice& key, void* value)) {
J
jorlow@chromium.org 已提交
230 231 232 233 234 235 236 237
  MutexLock l(&mutex_);

  LRUHandle* e = reinterpret_cast<LRUHandle*>(
      malloc(sizeof(LRUHandle)-1 + key.size()));
  e->value = value;
  e->deleter = deleter;
  e->charge = charge;
  e->key_length = key.size();
238
  e->hash = hash;
J
jorlow@chromium.org 已提交
239 240 241 242
  e->refs = 2;  // One from LRUCache, one for the returned handle
  memcpy(e->key_data, key.data(), key.size());
  LRU_Append(e);

243
  LRUHandle* old = table_.Insert(e);
A
Abhishek Kona 已提交
244
  if (old != nullptr) {
J
jorlow@chromium.org 已提交
245 246 247 248 249 250 251
    LRU_Remove(old);
    Unref(old);
  }

  while (usage_ > capacity_ && lru_.next != &lru_) {
    LRUHandle* old = lru_.next;
    LRU_Remove(old);
252
    table_.Remove(old->key(), old->hash);
J
jorlow@chromium.org 已提交
253 254 255
    Unref(old);
  }

256
  return reinterpret_cast<Cache::Handle*>(e);
J
jorlow@chromium.org 已提交
257 258
}

259
void LRUCache::Erase(const Slice& key, uint32_t hash) {
J
jorlow@chromium.org 已提交
260
  MutexLock l(&mutex_);
261
  LRUHandle* e = table_.Remove(key, hash);
A
Abhishek Kona 已提交
262
  if (e != nullptr) {
J
jorlow@chromium.org 已提交
263 264 265 266 267
    LRU_Remove(e);
    Unref(e);
  }
}

268
static int kNumShardBits = 4;         // default values, can be overridden
269 270 271

class ShardedLRUCache : public Cache {
 private:
272
  LRUCache* shard_;
273 274
  port::Mutex id_mutex_;
  uint64_t last_id_;
275
  int numShardBits;
276
  size_t capacity_;
277 278 279 280 281

  static inline uint32_t HashSlice(const Slice& s) {
    return Hash(s.data(), s.size(), 0);
  }

282
  uint32_t Shard(uint32_t hash) {
283 284
    // Note, hash >> 32 yields hash in gcc, not the zero we expect!
    return (numShardBits > 0) ? (hash >> (32 - numShardBits)) : 0;
285 286
  }

287 288
  void init(size_t capacity, int numbits) {
    numShardBits = numbits;
289
    capacity_ = capacity;
290 291 292 293
    int numShards = 1 << numShardBits;
    shard_ = new LRUCache[numShards];
    const size_t per_shard = (capacity + (numShards - 1)) / numShards;
    for (int s = 0; s < numShards; s++) {
294 295 296
      shard_[s].SetCapacity(per_shard);
    }
  }
297 298 299 300 301 302 303 304 305 306

 public:
  explicit ShardedLRUCache(size_t capacity)
      : last_id_(0) {
    init(capacity, kNumShardBits);
  }
  ShardedLRUCache(size_t capacity, int numShardBits)
     : last_id_(0) {
    init(capacity, numShardBits);
  }
307 308 309
  virtual ~ShardedLRUCache() {
    delete[] shard_;
  }
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
  virtual Handle* Insert(const Slice& key, void* value, size_t charge,
                         void (*deleter)(const Slice& key, void* value)) {
    const uint32_t hash = HashSlice(key);
    return shard_[Shard(hash)].Insert(key, hash, value, charge, deleter);
  }
  virtual Handle* Lookup(const Slice& key) {
    const uint32_t hash = HashSlice(key);
    return shard_[Shard(hash)].Lookup(key, hash);
  }
  virtual void Release(Handle* handle) {
    LRUHandle* h = reinterpret_cast<LRUHandle*>(handle);
    shard_[Shard(h->hash)].Release(handle);
  }
  virtual void Erase(const Slice& key) {
    const uint32_t hash = HashSlice(key);
    shard_[Shard(hash)].Erase(key, hash);
  }
  virtual void* Value(Handle* handle) {
    return reinterpret_cast<LRUHandle*>(handle)->value;
  }
  virtual uint64_t NewId() {
    MutexLock l(&id_mutex_);
    return ++(last_id_);
  }
334
  virtual size_t GetCapacity() {
335 336
    return capacity_;
  }
337
};
J
jorlow@chromium.org 已提交
338 339 340

}  // end anonymous namespace

341 342
shared_ptr<Cache> NewLRUCache(size_t capacity) {
  return std::make_shared<ShardedLRUCache>(capacity);
J
jorlow@chromium.org 已提交
343 344
}

345
shared_ptr<Cache> NewLRUCache(size_t capacity, int numShardBits) {
346
  if (numShardBits >= 20) {
A
Abhishek Kona 已提交
347
    return nullptr;  // the cache cannot be sharded into too many fine pieces
348
  }
349
  return std::make_shared<ShardedLRUCache>(capacity, numShardBits);
350 351
}

H
Hans Wennborg 已提交
352
}  // namespace leveldb