compaction.cc 8.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
//  This source code is licensed under the BSD-style license found in the
//  LICENSE file in the root directory of this source tree. An additional grant
//  of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include "db/compaction.h"
I
Igor Canadi 已提交
11 12 13 14 15

#define __STDC_FORMAT_MACROS
#include <inttypes.h>
#include <vector>

I
Igor Canadi 已提交
16
#include "db/column_family.h"
I
Igor Canadi 已提交
17
#include "util/logging.h"
18 19 20 21 22 23

namespace rocksdb {

static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
  uint64_t sum = 0;
  for (size_t i = 0; i < files.size() && files[i]; i++) {
24
    sum += files[i]->fd.GetFileSize();
25 26 27 28 29 30 31
  }
  return sum;
}

Compaction::Compaction(Version* input_version, int level, int out_level,
                       uint64_t target_file_size,
                       uint64_t max_grandparent_overlap_bytes,
32
                       CompressionType output_compression, bool seek_compaction,
I
Igor Canadi 已提交
33
                       bool deletion_compaction)
34 35 36
    : level_(level),
      out_level_(out_level),
      max_output_file_size_(target_file_size),
I
Igor Canadi 已提交
37
      max_grandparent_overlap_bytes_(max_grandparent_overlap_bytes),
38 39
      input_version_(input_version),
      number_levels_(input_version_->NumberLevels()),
I
Igor Canadi 已提交
40
      cfd_(input_version_->cfd_),
41
      output_compression_(output_compression),
42
      seek_compaction_(seek_compaction),
I
Igor Canadi 已提交
43
      deletion_compaction_(deletion_compaction),
44 45 46 47 48 49 50 51
      grandparent_index_(0),
      seen_key_(false),
      overlapped_bytes_(0),
      base_index_(-1),
      parent_index_(-1),
      score_(0),
      bottommost_level_(false),
      is_full_compaction_(false),
52
      is_manual_compaction_(false),
53 54
      level_ptrs_(std::vector<size_t>(number_levels_)) {

55
  cfd_->Ref();
56 57
  input_version_->Ref();
  edit_ = new VersionEdit();
I
Igor Canadi 已提交
58
  edit_->SetColumnFamily(cfd_->GetID());
59 60 61 62 63 64 65 66 67 68
  for (int i = 0; i < number_levels_; i++) {
    level_ptrs_[i] = 0;
  }
}

Compaction::~Compaction() {
  delete edit_;
  if (input_version_ != nullptr) {
    input_version_->Unref();
  }
69 70 71 72 73
  if (cfd_ != nullptr) {
    if (cfd_->Unref()) {
      delete cfd_;
    }
  }
74 75 76 77 78 79 80 81 82 83 84
}

bool Compaction::IsTrivialMove() const {
  // Avoid a move if there is lots of overlapping grandparent data.
  // Otherwise, the move could create a parent file that will require
  // a very expensive merge later on.
  // If level_== out_level_, the purpose is to force compaction filter to be
  // applied to that level, and thus cannot be a trivia move.
  return (level_ != out_level_ &&
          num_input_files(0) == 1 &&
          num_input_files(1) == 0 &&
I
Igor Canadi 已提交
85
          TotalFileSize(grandparents_) <= max_grandparent_overlap_bytes_);
86 87
}

I
Igor Canadi 已提交
88 89
bool Compaction::IsDeletionCompaction() const { return deletion_compaction_; }

90 91 92
void Compaction::AddInputDeletions(VersionEdit* edit) {
  for (int which = 0; which < 2; which++) {
    for (size_t i = 0; i < inputs_[which].size(); i++) {
93
      edit->DeleteFile(level_ + which, inputs_[which][i]->fd.GetNumber());
94 95 96 97 98
    }
  }
}

bool Compaction::IsBaseLevelForKey(const Slice& user_key) {
I
Igor Canadi 已提交
99
  assert(cfd_->options()->compaction_style != kCompactionStyleFIFO);
I
Igor Canadi 已提交
100
  if (cfd_->options()->compaction_style == kCompactionStyleUniversal) {
101 102 103
    return bottommost_level_;
  }
  // Maybe use binary search to find right entry instead of linear search?
I
Igor Canadi 已提交
104
  const Comparator* user_cmp = cfd_->user_comparator();
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
  for (int lvl = level_ + 2; lvl < number_levels_; lvl++) {
    const std::vector<FileMetaData*>& files = input_version_->files_[lvl];
    for (; level_ptrs_[lvl] < files.size(); ) {
      FileMetaData* f = files[level_ptrs_[lvl]];
      if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) {
        // We've advanced far enough
        if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) {
          // Key falls in this file's range, so definitely not base level
          return false;
        }
        break;
      }
      level_ptrs_[lvl]++;
    }
  }
  return true;
}

bool Compaction::ShouldStopBefore(const Slice& internal_key) {
  // Scan to find earliest grandparent file that contains key.
I
Igor Canadi 已提交
125
  const InternalKeyComparator* icmp = &cfd_->internal_comparator();
126 127 128 129
  while (grandparent_index_ < grandparents_.size() &&
      icmp->Compare(internal_key,
                    grandparents_[grandparent_index_]->largest.Encode()) > 0) {
    if (seen_key_) {
130
      overlapped_bytes_ += grandparents_[grandparent_index_]->fd.GetFileSize();
131 132 133 134 135 136 137 138 139
    }
    assert(grandparent_index_ + 1 >= grandparents_.size() ||
           icmp->Compare(grandparents_[grandparent_index_]->largest.Encode(),
                         grandparents_[grandparent_index_+1]->smallest.Encode())
                         < 0);
    grandparent_index_++;
  }
  seen_key_ = true;

I
Igor Canadi 已提交
140
  if (overlapped_bytes_ > max_grandparent_overlap_bytes_) {
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
    // Too much overlap for current output; start new output
    overlapped_bytes_ = 0;
    return true;
  } else {
    return false;
  }
}

// Mark (or clear) each file that is being compacted
void Compaction::MarkFilesBeingCompacted(bool value) {
  for (int i = 0; i < 2; i++) {
    std::vector<FileMetaData*> v = inputs_[i];
    for (unsigned int j = 0; j < inputs_[i].size(); j++) {
      assert(value ? !inputs_[i][j]->being_compacted :
                      inputs_[i][j]->being_compacted);
      inputs_[i][j]->being_compacted = value;
    }
  }
}

// Is this compaction producing files at the bottommost level?
void Compaction::SetupBottomMostLevel(bool isManual) {
I
Igor Canadi 已提交
163
  assert(cfd_->options()->compaction_style != kCompactionStyleFIFO);
I
Igor Canadi 已提交
164
  if (cfd_->options()->compaction_style == kCompactionStyleUniversal) {
165 166 167 168 169 170 171 172 173 174 175 176
    // If universal compaction style is used and manual
    // compaction is occuring, then we are guaranteed that
    // all files will be picked in a single compaction
    // run. We can safely set bottommost_level_ = true.
    // If it is not manual compaction, then bottommost_level_
    // is already set when the Compaction was created.
    if (isManual) {
      bottommost_level_ = true;
    }
    return;
  }
  bottommost_level_ = true;
I
Igor Canadi 已提交
177
  for (int i = output_level() + 1; i < number_levels_; i++) {
178 179 180 181 182 183 184 185 186 187 188 189
    if (input_version_->NumLevelFiles(i) > 0) {
      bottommost_level_ = false;
      break;
    }
  }
}

void Compaction::ReleaseInputs() {
  if (input_version_ != nullptr) {
    input_version_->Unref();
    input_version_ = nullptr;
  }
190 191 192 193 194 195
  if (cfd_ != nullptr) {
    if (cfd_->Unref()) {
      delete cfd_;
    }
    cfd_ = nullptr;
  }
196 197
}

I
Igor Canadi 已提交
198 199 200 201
void Compaction::ReleaseCompactionFiles(Status status) {
  cfd_->compaction_picker()->ReleaseCompactionFiles(this, status);
}

202 203 204 205
void Compaction::ResetNextCompactionIndex() {
  input_version_->ResetNextCompactionIndex(level_);
}

I
Igor Canadi 已提交
206 207 208
namespace {
int InputSummary(const std::vector<FileMetaData*>& files, char* output,
                 int len) {
209
  *output = '\0';
210 211 212
  int write = 0;
  for (unsigned int i = 0; i < files.size(); i++) {
    int sz = len - write;
M
Mike Lin 已提交
213 214
    int ret;
    char sztxt[16];
215 216 217
    AppendHumanBytes(files.at(i)->fd.GetFileSize(), sztxt, 16);
    ret = snprintf(output + write, sz, "%" PRIu64 "(%s) ",
                   files.at(i)->fd.GetNumber(), sztxt);
I
Igor Canadi 已提交
218
    if (ret < 0 || ret >= sz) break;
219 220
    write += ret;
  }
I
Igor Canadi 已提交
221 222
  // if files.size() is non-zero, overwrite the last space
  return write - !!files.size();
223
}
I
Igor Canadi 已提交
224
}  // namespace
225 226

void Compaction::Summary(char* output, int len) {
I
Igor Canadi 已提交
227 228 229 230
  int write =
      snprintf(output, len, "Base version %" PRIu64
                            " Base level %d, seek compaction:%d, inputs: [",
               input_version_->GetVersionNumber(), level_, seek_compaction_);
231 232 233 234
  if (write < 0 || write >= len) {
    return;
  }

I
Igor Canadi 已提交
235
  write += InputSummary(inputs_[0], output + write, len - write);
236 237 238 239
  if (write < 0 || write >= len) {
    return;
  }

I
Igor Canadi 已提交
240
  write += snprintf(output + write, len - write, "], [");
241
  if (write < 0 || write >= len) {
242 243 244
    return;
  }

I
Igor Canadi 已提交
245
  write += InputSummary(inputs_[1], output + write, len - write);
246 247
  if (write < 0 || write >= len) {
    return;
248 249
  }

I
Igor Canadi 已提交
250
  snprintf(output + write, len - write, "]");
251 252
}

253 254 255 256 257 258 259 260
uint64_t Compaction::OutputFilePreallocationSize() {
  uint64_t preallocation_size = 0;

  if (cfd_->options()->compaction_style == kCompactionStyleLevel) {
    preallocation_size =
        cfd_->compaction_picker()->MaxFileSizeForLevel(output_level());
  } else {
    for (const auto& f : inputs_[0]) {
261
      preallocation_size += f->fd.GetFileSize();
262 263 264 265 266 267 268
    }
  }
  // Over-estimate slightly so we don't end up just barely crossing
  // the threshold
  return preallocation_size * 1.1;
}

269
}  // namespace rocksdb