compaction.cc 11.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
//  This source code is licensed under the BSD-style license found in the
//  LICENSE file in the root directory of this source tree. An additional grant
//  of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include "db/compaction.h"
I
Igor Canadi 已提交
11

L
liuhuahang 已提交
12
#ifndef __STDC_FORMAT_MACROS
I
Igor Canadi 已提交
13
#define __STDC_FORMAT_MACROS
L
liuhuahang 已提交
14 15
#endif

I
Igor Canadi 已提交
16 17 18
#include <inttypes.h>
#include <vector>

I
Igor Canadi 已提交
19
#include "db/column_family.h"
I
Igor Canadi 已提交
20
#include "util/logging.h"
21 22 23

namespace rocksdb {

M
miguelportilla 已提交
24
uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
25 26
  uint64_t sum = 0;
  for (size_t i = 0; i < files.size() && files[i]; i++) {
27
    sum += files[i]->fd.GetFileSize();
28 29 30 31
  }
  return sum;
}

I
Igor Canadi 已提交
32 33
void Compaction::SetInputVersion(Version* _input_version) {
  input_version_ = _input_version;
S
sdong 已提交
34 35 36 37 38 39 40 41 42
  cfd_ = input_version_->cfd();

  cfd_->Ref();
  input_version_->Ref();
  edit_ = new VersionEdit();
  edit_->SetColumnFamily(cfd_->GetID());
}

Compaction::Compaction(int number_levels, int start_level, int out_level,
43 44
                       uint64_t target_file_size,
                       uint64_t max_grandparent_overlap_bytes,
45
                       uint32_t output_path_id,
46
                       CompressionType output_compression, bool seek_compaction,
I
Igor Canadi 已提交
47
                       bool deletion_compaction)
48 49
    : start_level_(start_level),
      output_level_(out_level),
50
      max_output_file_size_(target_file_size),
I
Igor Canadi 已提交
51
      max_grandparent_overlap_bytes_(max_grandparent_overlap_bytes),
S
sdong 已提交
52 53 54 55
      input_version_(nullptr),
      edit_(nullptr),
      number_levels_(number_levels),
      cfd_(nullptr),
56
      output_path_id_(output_path_id),
57
      output_compression_(output_compression),
58
      seek_compaction_(seek_compaction),
I
Igor Canadi 已提交
59
      deletion_compaction_(deletion_compaction),
60 61 62 63 64 65 66 67
      grandparent_index_(0),
      seen_key_(false),
      overlapped_bytes_(0),
      base_index_(-1),
      parent_index_(-1),
      score_(0),
      bottommost_level_(false),
      is_full_compaction_(false),
68
      is_manual_compaction_(false),
69 70 71 72
      level_ptrs_(std::vector<size_t>(number_levels_)) {
  for (int i = 0; i < number_levels_; i++) {
    level_ptrs_[i] = 0;
  }
73
  int num_levels = output_level_ - start_level_ + 1;
74
  input_levels_.resize(num_levels);
75 76 77
  inputs_.resize(num_levels);
  for (int i = 0; i < num_levels; ++i) {
    inputs_[i].level = start_level_ + i;
78
  }
79 80
}

81
Compaction::Compaction(VersionStorageInfo* vstorage,
82 83 84 85 86 87 88 89 90 91
    const autovector<CompactionInputFiles>& _inputs,
    int _start_level, int _output_level,
    uint64_t _max_grandparent_overlap_bytes,
    const CompactionOptions& _options,
    bool _deletion_compaction)
    : start_level_(_start_level),
      output_level_(_output_level),
      max_output_file_size_(_options.output_file_size_limit),
      max_grandparent_overlap_bytes_(_max_grandparent_overlap_bytes),
      input_version_(nullptr),
92
      number_levels_(vstorage->num_levels()),
93
      cfd_(nullptr),
94
      output_compression_(_options.compression),
95
      seek_compaction_(false),
96 97
      deletion_compaction_(_deletion_compaction),
      inputs_(_inputs),
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
      grandparent_index_(0),
      seen_key_(false),
      overlapped_bytes_(0),
      base_index_(-1),
      parent_index_(-1),
      score_(0),
      bottommost_level_(false),
      is_full_compaction_(false),
      is_manual_compaction_(false),
      level_ptrs_(std::vector<size_t>(number_levels_)) {
  for (int i = 0; i < number_levels_; i++) {
    level_ptrs_[i] = 0;
  }
}

113 114 115 116 117
Compaction::~Compaction() {
  delete edit_;
  if (input_version_ != nullptr) {
    input_version_->Unref();
  }
118 119 120 121 122
  if (cfd_ != nullptr) {
    if (cfd_->Unref()) {
      delete cfd_;
    }
  }
123 124
}

F
Feng Zhu 已提交
125
void Compaction::GenerateFileLevels() {
126
  input_levels_.resize(num_input_levels());
127 128 129
  for (size_t which = 0; which < num_input_levels(); which++) {
    DoGenerateLevelFilesBrief(&input_levels_[which], inputs_[which].files,
                              &arena_);
F
Feng Zhu 已提交
130 131 132
  }
}

133 134 135 136
bool Compaction::IsTrivialMove() const {
  // Avoid a move if there is lots of overlapping grandparent data.
  // Otherwise, the move could create a parent file that will require
  // a very expensive merge later on.
137 138 139 140
  // If start_level_== output_level_, the purpose is to force compaction
  // filter to be applied to that level, and thus cannot be a trivia move.
  return (start_level_ != output_level_ &&
          num_input_levels() == 2 &&
141 142
          num_input_files(0) == 1 &&
          num_input_files(1) == 0 &&
I
Igor Canadi 已提交
143
          TotalFileSize(grandparents_) <= max_grandparent_overlap_bytes_);
144 145
}

I
Igor Canadi 已提交
146
void Compaction::AddInputDeletions(VersionEdit* out_edit) {
147
  for (size_t which = 0; which < num_input_levels(); which++) {
148
    for (size_t i = 0; i < inputs_[which].size(); i++) {
I
Igor Canadi 已提交
149
      out_edit->DeleteFile(level(which), inputs_[which][i]->fd.GetNumber());
150 151 152 153
    }
  }
}

154
bool Compaction::KeyNotExistsBeyondOutputLevel(const Slice& user_key) {
S
sdong 已提交
155
  assert(input_version_ != nullptr);
156 157
  assert(cfd_->ioptions()->compaction_style != kCompactionStyleFIFO);
  if (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal) {
158 159 160
    return bottommost_level_;
  }
  // Maybe use binary search to find right entry instead of linear search?
I
Igor Canadi 已提交
161
  const Comparator* user_cmp = cfd_->user_comparator();
162
  for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) {
S
sdong 已提交
163
    const std::vector<FileMetaData*>& files =
S
sdong 已提交
164
        input_version_->storage_info()->LevelFiles(lvl);
165 166 167 168 169
    for (; level_ptrs_[lvl] < files.size(); ) {
      FileMetaData* f = files[level_ptrs_[lvl]];
      if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) {
        // We've advanced far enough
        if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) {
170 171
          // Key falls in this file's range, so definitely
          // exists beyond output level
172 173 174 175 176 177 178 179 180 181 182 183
          return false;
        }
        break;
      }
      level_ptrs_[lvl]++;
    }
  }
  return true;
}

bool Compaction::ShouldStopBefore(const Slice& internal_key) {
  // Scan to find earliest grandparent file that contains key.
I
Igor Canadi 已提交
184
  const InternalKeyComparator* icmp = &cfd_->internal_comparator();
185 186 187 188
  while (grandparent_index_ < grandparents_.size() &&
      icmp->Compare(internal_key,
                    grandparents_[grandparent_index_]->largest.Encode()) > 0) {
    if (seen_key_) {
189
      overlapped_bytes_ += grandparents_[grandparent_index_]->fd.GetFileSize();
190 191 192 193 194 195 196 197 198
    }
    assert(grandparent_index_ + 1 >= grandparents_.size() ||
           icmp->Compare(grandparents_[grandparent_index_]->largest.Encode(),
                         grandparents_[grandparent_index_+1]->smallest.Encode())
                         < 0);
    grandparent_index_++;
  }
  seen_key_ = true;

I
Igor Canadi 已提交
199
  if (overlapped_bytes_ > max_grandparent_overlap_bytes_) {
200 201 202 203 204 205 206 207 208
    // Too much overlap for current output; start new output
    overlapped_bytes_ = 0;
    return true;
  } else {
    return false;
  }
}

// Mark (or clear) each file that is being compacted
209
void Compaction::MarkFilesBeingCompacted(bool mark_as_compacted) {
210
  for (size_t i = 0; i < num_input_levels(); i++) {
211
    for (unsigned int j = 0; j < inputs_[i].size(); j++) {
212 213 214
      assert(mark_as_compacted ? !inputs_[i][j]->being_compacted :
                                  inputs_[i][j]->being_compacted);
      inputs_[i][j]->being_compacted = mark_as_compacted;
215 216 217 218 219
    }
  }
}

// Is this compaction producing files at the bottommost level?
S
sdong 已提交
220 221 222
void Compaction::SetupBottomMostLevel(VersionStorageInfo* vstorage,
                                      bool is_manual, bool level0_only) {
  if (level0_only) {
223 224 225 226 227 228
    // If universal compaction style is used and manual
    // compaction is occuring, then we are guaranteed that
    // all files will be picked in a single compaction
    // run. We can safely set bottommost_level_ = true.
    // If it is not manual compaction, then bottommost_level_
    // is already set when the Compaction was created.
229
    if (is_manual) {
230 231 232 233 234
      bottommost_level_ = true;
    }
    return;
  }
  bottommost_level_ = true;
235 236
  // checks whether there are files living beyond the output_level.
  for (int i = output_level_ + 1; i < number_levels_; i++) {
S
sdong 已提交
237
    if (vstorage->NumLevelFiles(i) > 0) {
238 239 240 241 242 243 244 245 246 247 248
      bottommost_level_ = false;
      break;
    }
  }
}

void Compaction::ReleaseInputs() {
  if (input_version_ != nullptr) {
    input_version_->Unref();
    input_version_ = nullptr;
  }
249 250 251 252 253 254
  if (cfd_ != nullptr) {
    if (cfd_->Unref()) {
      delete cfd_;
    }
    cfd_ = nullptr;
  }
255 256
}

I
Igor Canadi 已提交
257 258 259 260
void Compaction::ReleaseCompactionFiles(Status status) {
  cfd_->compaction_picker()->ReleaseCompactionFiles(this, status);
}

261
void Compaction::ResetNextCompactionIndex() {
S
sdong 已提交
262
  assert(input_version_ != nullptr);
S
sdong 已提交
263
  input_version_->storage_info()->ResetNextCompactionIndex(start_level_);
264 265
}

I
Igor Canadi 已提交
266 267 268
namespace {
int InputSummary(const std::vector<FileMetaData*>& files, char* output,
                 int len) {
269
  *output = '\0';
270 271 272
  int write = 0;
  for (unsigned int i = 0; i < files.size(); i++) {
    int sz = len - write;
M
Mike Lin 已提交
273 274
    int ret;
    char sztxt[16];
275 276 277
    AppendHumanBytes(files.at(i)->fd.GetFileSize(), sztxt, 16);
    ret = snprintf(output + write, sz, "%" PRIu64 "(%s) ",
                   files.at(i)->fd.GetNumber(), sztxt);
I
Igor Canadi 已提交
278
    if (ret < 0 || ret >= sz) break;
279 280
    write += ret;
  }
I
Igor Canadi 已提交
281 282
  // if files.size() is non-zero, overwrite the last space
  return write - !!files.size();
283
}
I
Igor Canadi 已提交
284
}  // namespace
285 286

void Compaction::Summary(char* output, int len) {
I
Igor Canadi 已提交
287 288 289
  int write =
      snprintf(output, len, "Base version %" PRIu64
                            " Base level %d, seek compaction:%d, inputs: [",
290 291
               input_version_->GetVersionNumber(),
               start_level_, seek_compaction_);
292
  if (write < 0 || write >= len) {
293 294 295
    return;
  }

296
  for (size_t level_iter = 0; level_iter < num_input_levels(); ++level_iter) {
I
Igor Canadi 已提交
297
    if (level_iter > 0) {
298 299 300 301 302
      write += snprintf(output + write, len - write, "], [");
      if (write < 0 || write >= len) {
        return;
      }
    }
I
Igor Canadi 已提交
303 304
    write +=
        InputSummary(inputs_[level_iter].files, output + write, len - write);
305 306 307
    if (write < 0 || write >= len) {
      return;
    }
308 309
  }

I
Igor Canadi 已提交
310
  snprintf(output + write, len - write, "]");
311 312
}

313 314
uint64_t Compaction::OutputFilePreallocationSize(
    const MutableCFOptions& mutable_options) {
315 316
  uint64_t preallocation_size = 0;

317
  if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel) {
318
    preallocation_size = mutable_options.MaxFileSizeForLevel(output_level());
319
  } else {
320
    for (size_t level_iter = 0; level_iter < num_input_levels(); ++level_iter) {
I
Igor Canadi 已提交
321
      for (const auto& f : inputs_[level_iter].files) {
322 323
        preallocation_size += f->fd.GetFileSize();
      }
324 325 326 327 328 329 330
    }
  }
  // Over-estimate slightly so we don't end up just barely crossing
  // the threshold
  return preallocation_size * 1.1;
}

I
Igor Canadi 已提交
331 332 333 334 335 336 337 338 339 340 341
Compaction* Compaction::TEST_NewCompaction(
    int num_levels, int start_level, int out_level, uint64_t target_file_size,
    uint64_t max_grandparent_overlap_bytes, uint32_t output_path_id,
    CompressionType output_compression, bool seek_compaction,
    bool deletion_compaction) {
  return new Compaction(num_levels, start_level, out_level, target_file_size,
                        max_grandparent_overlap_bytes, output_path_id,
                        output_compression, seek_compaction,
                        deletion_compaction);
}

342
}  // namespace rocksdb