memtable.cc 6.3 KB
Newer Older
J
jorlow@chromium.org 已提交
1 2 3 4 5 6
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include "db/memtable.h"
#include "db/dbformat.h"
7 8 9
#include "leveldb/comparator.h"
#include "leveldb/env.h"
#include "leveldb/iterator.h"
10
#include "leveldb/merge_operator.h"
J
jorlow@chromium.org 已提交
11 12 13 14 15 16 17 18 19 20 21
#include "util/coding.h"

namespace leveldb {

static Slice GetLengthPrefixedSlice(const char* data) {
  uint32_t len;
  const char* p = data;
  p = GetVarint32Ptr(p, p + 5, &len);  // +5: we assume "p" is not corrupted
  return Slice(p, len);
}

22
MemTable::MemTable(const InternalKeyComparator& cmp, int numlevel)
J
jorlow@chromium.org 已提交
23
    : comparator_(cmp),
24
      refs_(0),
25 26 27
      table_(comparator_, &arena_),
      flush_in_progress_(false),
      flush_completed_(false),
A
Abhishek Kona 已提交
28
      file_number_(0),
29 30
      edit_(numlevel),
      first_seqno_(0) {
J
jorlow@chromium.org 已提交
31 32 33
}

MemTable::~MemTable() {
34
  assert(refs_ == 0);
J
jorlow@chromium.org 已提交
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
}

size_t MemTable::ApproximateMemoryUsage() { return arena_.MemoryUsage(); }

int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr)
    const {
  // Internal keys are encoded as length-prefixed strings.
  Slice a = GetLengthPrefixedSlice(aptr);
  Slice b = GetLengthPrefixedSlice(bptr);
  return comparator.Compare(a, b);
}

// Encode a suitable internal key target for "target" and return it.
// Uses *scratch as scratch space, and the returned pointer will point
// into this scratch space.
static const char* EncodeKey(std::string* scratch, const Slice& target) {
  scratch->clear();
  PutVarint32(scratch, target.size());
  scratch->append(target.data(), target.size());
  return scratch->data();
}

class MemTableIterator: public Iterator {
 public:
59 60 61 62 63 64 65 66 67
  explicit MemTableIterator(MemTable::Table* table) : iter_(table) { }

  virtual bool Valid() const { return iter_.Valid(); }
  virtual void Seek(const Slice& k) { iter_.Seek(EncodeKey(&tmp_, k)); }
  virtual void SeekToFirst() { iter_.SeekToFirst(); }
  virtual void SeekToLast() { iter_.SeekToLast(); }
  virtual void Next() { iter_.Next(); }
  virtual void Prev() { iter_.Prev(); }
  virtual Slice key() const { return GetLengthPrefixedSlice(iter_.key()); }
J
jorlow@chromium.org 已提交
68
  virtual Slice value() const {
69
    Slice key_slice = GetLengthPrefixedSlice(iter_.key());
J
jorlow@chromium.org 已提交
70 71 72 73 74 75
    return GetLengthPrefixedSlice(key_slice.data() + key_slice.size());
  }

  virtual Status status() const { return Status::OK(); }

 private:
76
  MemTable::Table::Iterator iter_;
J
jorlow@chromium.org 已提交
77 78 79 80 81 82 83 84
  std::string tmp_;       // For passing to EncodeKey

  // No copying allowed
  MemTableIterator(const MemTableIterator&);
  void operator=(const MemTableIterator&);
};

Iterator* MemTable::NewIterator() {
85
  return new MemTableIterator(&table_);
J
jorlow@chromium.org 已提交
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
}

void MemTable::Add(SequenceNumber s, ValueType type,
                   const Slice& key,
                   const Slice& value) {
  // Format of an entry is concatenation of:
  //  key_size     : varint32 of internal_key.size()
  //  key bytes    : char[internal_key.size()]
  //  value_size   : varint32 of value.size()
  //  value bytes  : char[value.size()]
  size_t key_size = key.size();
  size_t val_size = value.size();
  size_t internal_key_size = key_size + 8;
  const size_t encoded_len =
      VarintLength(internal_key_size) + internal_key_size +
      VarintLength(val_size) + val_size;
  char* buf = arena_.Allocate(encoded_len);
  char* p = EncodeVarint32(buf, internal_key_size);
  memcpy(p, key.data(), key_size);
  p += key_size;
  EncodeFixed64(p, (s << 8) | type);
  p += 8;
  p = EncodeVarint32(p, val_size);
  memcpy(p, value.data(), val_size);
110
  assert((p + val_size) - buf == (unsigned)encoded_len);
J
jorlow@chromium.org 已提交
111
  table_.Insert(buf);
112 113 114 115 116 117

  // The first sequence number inserted into the memtable
  assert(first_seqno_ == 0 || s > first_seqno_);
  if (first_seqno_ == 0) {
    first_seqno_ = s;
  }
J
jorlow@chromium.org 已提交
118 119
}

120 121
bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
                  const Options& options) {
122 123 124
  Slice memkey = key.memtable_key();
  Table::Iterator iter(&table_);
  iter.Seek(memkey.data());
125 126 127 128 129 130 131 132 133 134 135 136

  bool merge_in_progress = false;
  std::string operand;
  if (s->IsMergeInProgress()) {
    swap(*value, operand);
    merge_in_progress = true;
  }


  auto merge_operator = options.merge_operator;
  auto logger = options.info_log;
  for (; iter.Valid(); iter.Next()) {
137 138
    // entry format is:
    //    klength  varint32
139
    //    userkey  char[klength-8]
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
    //    tag      uint64
    //    vlength  varint32
    //    value    char[vlength]
    // Check that it belongs to same user key.  We do not check the
    // sequence number since the Seek() call above should have skipped
    // all entries with overly large sequence numbers.
    const char* entry = iter.key();
    uint32_t key_length;
    const char* key_ptr = GetVarint32Ptr(entry, entry+5, &key_length);
    if (comparator_.comparator.user_comparator()->Compare(
            Slice(key_ptr, key_length - 8),
            key.user_key()) == 0) {
      // Correct user key
      const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
      switch (static_cast<ValueType>(tag & 0xff)) {
        case kTypeValue: {
          Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
157 158 159 160 161 162
          if (merge_in_progress) {
            merge_operator->Merge(key.user_key(), &v, operand,
                                   value, logger.get());
          } else {
            value->assign(v.data(), v.size());
          }
163 164
          return true;
        }
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
        case kTypeMerge: {
          Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
          if (merge_in_progress) {
            merge_operator->Merge(key.user_key(), &v, operand,
                                  value, logger.get());
            swap(*value, operand);
          } else {
            assert(merge_operator);
            merge_in_progress = true;
            operand.assign(v.data(), v.size());
          }
          break;
        }
        case kTypeDeletion: {
          if (merge_in_progress) {
            merge_operator->Merge(key.user_key(), nullptr, operand,
                                   value, logger.get());
          } else {
            *s = Status::NotFound(Slice());
          }
185
          return true;
186
        }
187
      }
188 189 190
    } else {
      // exit loop if user key does not match
      break;
191 192
    }
  }
193 194 195 196 197

  if (merge_in_progress) {
    swap(*value, operand);
    *s = Status::MergeInProgress("");
  }
198 199 200
  return false;
}

H
Hans Wennborg 已提交
201
}  // namespace leveldb