write_batch.cc 9.0 KB
Newer Older
1 2 3 4 5
//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
//  This source code is licensed under the BSD-style license found in the
//  LICENSE file in the root directory of this source tree. An additional grant
//  of patent rights can be found in the PATENTS file in the same directory.
//
J
jorlow@chromium.org 已提交
6 7 8 9 10 11 12 13 14
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// WriteBatch::rep_ :=
//    sequence: fixed64
//    count: fixed32
//    data: record[count]
// record :=
15 16
//    kTypeValue varstring varstring
//    kTypeMerge varstring varstring
J
jorlow@chromium.org 已提交
17 18 19 20 21
//    kTypeDeletion varstring
// varstring :=
//    len: varint32
//    data: uint8[len]

22 23
#include "rocksdb/write_batch.h"
#include "rocksdb/options.h"
24
#include "rocksdb/merge_operator.h"
J
jorlow@chromium.org 已提交
25
#include "db/dbformat.h"
26
#include "db/db_impl.h"
J
jorlow@chromium.org 已提交
27
#include "db/memtable.h"
28
#include "db/snapshot.h"
J
jorlow@chromium.org 已提交
29 30
#include "db/write_batch_internal.h"
#include "util/coding.h"
31
#include "util/statistics_imp.h"
32
#include <stdexcept>
J
jorlow@chromium.org 已提交
33

34
namespace rocksdb {
J
jorlow@chromium.org 已提交
35

36 37 38
// WriteBatch header has an 8-byte sequence number followed by a 4-byte count.
static const size_t kHeader = 12;

39 40
WriteBatch::WriteBatch(size_t reserved_bytes) {
  rep_.reserve((reserved_bytes > kHeader) ? reserved_bytes : kHeader);
J
jorlow@chromium.org 已提交
41 42 43 44 45
  Clear();
}

WriteBatch::~WriteBatch() { }

46 47
WriteBatch::Handler::~Handler() { }

48 49 50 51
void WriteBatch::Handler::Merge(const Slice& key, const Slice& value) {
  throw std::runtime_error("Handler::Merge not implemented!");
}

J
Jim Paton 已提交
52 53 54 55 56
void WriteBatch::Handler::LogData(const Slice& blob) {
  // If the user has not specified something to do with blobs, then we ignore
  // them.
}

57 58 59 60
bool WriteBatch::Handler::Continue() {
  return true;
}

J
jorlow@chromium.org 已提交
61 62
void WriteBatch::Clear() {
  rep_.clear();
63
  rep_.resize(kHeader);
J
jorlow@chromium.org 已提交
64 65
}

H
Haobo Xu 已提交
66 67 68 69
int WriteBatch::Count() const {
  return WriteBatchInternal::Count(this);
}

70 71
Status WriteBatch::Iterate(Handler* handler) const {
  Slice input(rep_);
72
  if (input.size() < kHeader) {
73 74 75
    return Status::Corruption("malformed WriteBatch (too small)");
  }

76
  input.remove_prefix(kHeader);
J
Jim Paton 已提交
77
  Slice key, value, blob;
78
  int found = 0;
79
  while (!input.empty() && handler->Continue()) {
80 81 82 83 84 85 86
    char tag = input[0];
    input.remove_prefix(1);
    switch (tag) {
      case kTypeValue:
        if (GetLengthPrefixedSlice(&input, &key) &&
            GetLengthPrefixedSlice(&input, &value)) {
          handler->Put(key, value);
J
Jim Paton 已提交
87
          found++;
88 89 90 91 92 93 94
        } else {
          return Status::Corruption("bad WriteBatch Put");
        }
        break;
      case kTypeDeletion:
        if (GetLengthPrefixedSlice(&input, &key)) {
          handler->Delete(key);
J
Jim Paton 已提交
95
          found++;
96 97 98 99
        } else {
          return Status::Corruption("bad WriteBatch Delete");
        }
        break;
100 101 102 103
      case kTypeMerge:
        if (GetLengthPrefixedSlice(&input, &key) &&
            GetLengthPrefixedSlice(&input, &value)) {
          handler->Merge(key, value);
J
Jim Paton 已提交
104
          found++;
105 106 107 108
        } else {
          return Status::Corruption("bad WriteBatch Merge");
        }
        break;
J
Jim Paton 已提交
109 110 111 112 113 114 115
      case kTypeLogData:
        if (GetLengthPrefixedSlice(&input, &blob)) {
          handler->LogData(blob);
        } else {
          return Status::Corruption("bad WriteBatch Blob");
        }
        break;
116 117 118 119 120 121 122 123 124 125 126
      default:
        return Status::Corruption("unknown WriteBatch tag");
    }
  }
  if (found != WriteBatchInternal::Count(this)) {
    return Status::Corruption("WriteBatch has wrong count");
  } else {
    return Status::OK();
  }
}

J
jorlow@chromium.org 已提交
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
int WriteBatchInternal::Count(const WriteBatch* b) {
  return DecodeFixed32(b->rep_.data() + 8);
}

void WriteBatchInternal::SetCount(WriteBatch* b, int n) {
  EncodeFixed32(&b->rep_[8], n);
}

SequenceNumber WriteBatchInternal::Sequence(const WriteBatch* b) {
  return SequenceNumber(DecodeFixed64(b->rep_.data()));
}

void WriteBatchInternal::SetSequence(WriteBatch* b, SequenceNumber seq) {
  EncodeFixed64(&b->rep_[0], seq);
}

void WriteBatch::Put(const Slice& key, const Slice& value) {
  WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
  rep_.push_back(static_cast<char>(kTypeValue));
  PutLengthPrefixedSlice(&rep_, key);
  PutLengthPrefixedSlice(&rep_, value);
}

150 151 152 153 154 155 156
void WriteBatch::Put(const SliceParts& key, const SliceParts& value) {
  WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
  rep_.push_back(static_cast<char>(kTypeValue));
  PutLengthPrefixedSliceParts(&rep_, key);
  PutLengthPrefixedSliceParts(&rep_, value);
}

J
jorlow@chromium.org 已提交
157 158 159 160 161 162
void WriteBatch::Delete(const Slice& key) {
  WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
  rep_.push_back(static_cast<char>(kTypeDeletion));
  PutLengthPrefixedSlice(&rep_, key);
}

163 164 165 166 167 168 169
void WriteBatch::Merge(const Slice& key, const Slice& value) {
  WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
  rep_.push_back(static_cast<char>(kTypeMerge));
  PutLengthPrefixedSlice(&rep_, key);
  PutLengthPrefixedSlice(&rep_, value);
}

J
Jim Paton 已提交
170 171 172 173
void WriteBatch::PutLogData(const Slice& blob) {
  rep_.push_back(static_cast<char>(kTypeLogData));
  PutLengthPrefixedSlice(&rep_, blob);
}
174

175 176 177 178 179
namespace {
class MemTableInserter : public WriteBatch::Handler {
 public:
  SequenceNumber sequence_;
  MemTable* mem_;
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
  const Options* options_;
  DBImpl* db_;
  const bool filter_deletes_;

  MemTableInserter(SequenceNumber sequence, MemTable* mem, const Options* opts,
                   DB* db, const bool filter_deletes)
    : sequence_(sequence),
      mem_(mem),
      options_(opts),
      db_(reinterpret_cast<DBImpl*>(db)),
      filter_deletes_(filter_deletes) {
    assert(mem_);
    if (filter_deletes_) {
      assert(options_);
      assert(db_);
    }
  }
197 198

  virtual void Put(const Slice& key, const Slice& value) {
199 200
    if (options_->inplace_update_support
        && mem_->Update(sequence_, kTypeValue, key, value)) {
201
      RecordTick(options_->statistics.get(), NUMBER_KEYS_UPDATED);
202 203 204
    } else {
      mem_->Add(sequence_, kTypeValue, key, value);
    }
205
    sequence_++;
J
jorlow@chromium.org 已提交
206
  }
207
  virtual void Merge(const Slice& key, const Slice& value) {
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
    bool perform_merge = false;

    if (options_->max_successive_merges > 0 && db_ != nullptr) {
      LookupKey lkey(key, sequence_);

      // Count the number of successive merges at the head
      // of the key in the memtable
      size_t num_merges = mem_->CountSuccessiveMergeEntries(lkey);

      if (num_merges >= options_->max_successive_merges) {
        perform_merge = true;
      }
    }

    if (perform_merge) {
      // 1) Get the existing value
      std::string get_value;

      // Pass in the sequence number so that we also include previous merge
      // operations in the same batch.
      SnapshotImpl read_from_snapshot;
      read_from_snapshot.number_ = sequence_;
      ReadOptions read_options;
      read_options.snapshot = &read_from_snapshot;

      db_->Get(read_options, key, &get_value);
      Slice get_value_slice = Slice(get_value);

      // 2) Apply this merge
      auto merge_operator = options_->merge_operator.get();
      assert(merge_operator);

      std::deque<std::string> operands;
      operands.push_front(value.ToString());
      std::string new_value;
      if (!merge_operator->FullMerge(key,
                                     &get_value_slice,
                                     operands,
                                     &new_value,
                                     options_->info_log.get())) {
          // Failed to merge!
          RecordTick(options_->statistics.get(), NUMBER_MERGE_FAILURES);

          // Store the delta in memtable
          perform_merge = false;
      } else {
        // 3) Add value to memtable
        mem_->Add(sequence_, kTypeValue, key, new_value);
      }
    }

    if (!perform_merge) {
      // Add merge operator to memtable
      mem_->Add(sequence_, kTypeMerge, key, value);
    }

264 265
    sequence_++;
  }
266
  virtual void Delete(const Slice& key) {
267 268 269 270 271 272 273
    if (filter_deletes_) {
      SnapshotImpl read_from_snapshot;
      read_from_snapshot.number_ = sequence_;
      ReadOptions ropts;
      ropts.snapshot = &read_from_snapshot;
      std::string value;
      if (!db_->KeyMayExist(ropts, key, &value)) {
274
        RecordTick(options_->statistics.get(), NUMBER_FILTERED_DELETES);
275 276
        return;
      }
277
    }
278 279
    mem_->Add(sequence_, kTypeDeletion, key, Slice());
    sequence_++;
J
jorlow@chromium.org 已提交
280
  }
281
};
H
Hans Wennborg 已提交
282
}  // namespace
283

284 285 286 287 288
Status WriteBatchInternal::InsertInto(const WriteBatch* b, MemTable* mem,
                                      const Options* opts, DB* db,
                                      const bool filter_deletes) {
  MemTableInserter inserter(WriteBatchInternal::Sequence(b), mem, opts, db,
                            filter_deletes);
289
  return b->Iterate(&inserter);
J
jorlow@chromium.org 已提交
290 291 292
}

void WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) {
293
  assert(contents.size() >= kHeader);
J
jorlow@chromium.org 已提交
294 295 296
  b->rep_.assign(contents.data(), contents.size());
}

297 298 299 300 301 302
void WriteBatchInternal::Append(WriteBatch* dst, const WriteBatch* src) {
  SetCount(dst, Count(dst) + Count(src));
  assert(src->rep_.size() >= kHeader);
  dst->rep_.append(src->rep_.data() + kHeader, src->rep_.size() - kHeader);
}

303
}  // namespace rocksdb