db_test.cc 430.2 KB
Newer Older
1 2 3 4 5
//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
//  This source code is licensed under the BSD-style license found in the
//  LICENSE file in the root directory of this source tree. An additional grant
//  of patent rights can be found in the PATENTS file in the same directory.
//
J
jorlow@chromium.org 已提交
6 7 8 9
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

10
#include <algorithm>
11
#include <iostream>
12
#include <set>
I
Igor Canadi 已提交
13
#include <unistd.h>
14
#include <thread>
15
#include <unordered_set>
S
Stanislau Hlebik 已提交
16
#include <utility>
17

18
#include "db/filename.h"
19
#include "db/dbformat.h"
J
jorlow@chromium.org 已提交
20 21
#include "db/db_impl.h"
#include "db/filename.h"
I
Igor Canadi 已提交
22
#include "db/job_context.h"
J
jorlow@chromium.org 已提交
23 24
#include "db/version_set.h"
#include "db/write_batch_internal.h"
I
Igor Canadi 已提交
25
#include "port/stack_trace.h"
26 27
#include "rocksdb/cache.h"
#include "rocksdb/compaction_filter.h"
K
Kai Liu 已提交
28
#include "rocksdb/db.h"
29
#include "rocksdb/env.h"
30
#include "rocksdb/experimental.h"
K
Kai Liu 已提交
31
#include "rocksdb/filter_policy.h"
32
#include "rocksdb/perf_context.h"
33 34
#include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h"
S
Siying Dong 已提交
35
#include "rocksdb/table.h"
L
Lei Jin 已提交
36
#include "rocksdb/options.h"
37
#include "rocksdb/table_properties.h"
Y
Yueh-Hsuan Chiang 已提交
38
#include "rocksdb/thread_status.h"
39
#include "rocksdb/utilities/write_batch_with_index.h"
40
#include "rocksdb/utilities/checkpoint.h"
41
#include "rocksdb/utilities/convenience.h"
A
agiardullo 已提交
42
#include "rocksdb/utilities/optimistic_transaction_db.h"
K
kailiu 已提交
43
#include "table/block_based_table_factory.h"
44
#include "table/mock_table.h"
45
#include "table/plain_table_factory.h"
S
Sanjay Ghemawat 已提交
46
#include "util/hash.h"
K
Kai Liu 已提交
47
#include "util/hash_linklist_rep.h"
48
#include "utilities/merge_operators.h"
J
jorlow@chromium.org 已提交
49
#include "util/logging.h"
I
Igor Canadi 已提交
50
#include "util/compression.h"
51
#include "util/mutexlock.h"
L
Lei Jin 已提交
52
#include "util/rate_limiter.h"
K
Kai Liu 已提交
53
#include "util/statistics.h"
J
jorlow@chromium.org 已提交
54
#include "util/testharness.h"
55
#include "util/scoped_arena_iterator.h"
56
#include "util/sync_point.h"
J
jorlow@chromium.org 已提交
57
#include "util/testutil.h"
58
#include "util/mock_env.h"
59
#include "util/string_util.h"
60
#include "util/thread_status_util.h"
61
#include "util/xfunc.h"
J
jorlow@chromium.org 已提交
62

63
namespace rocksdb {
J
jorlow@chromium.org 已提交
64

I
Igor Canadi 已提交
65
static std::string RandomString(Random* rnd, int len) {
J
jorlow@chromium.org 已提交
66 67 68 69 70
  std::string r;
  test::RandomString(rnd, len, &r);
  return r;
}

71
namespace anon {
S
Sanjay Ghemawat 已提交
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
class AtomicCounter {
 private:
  port::Mutex mu_;
  int count_;
 public:
  AtomicCounter() : count_(0) { }
  void Increment() {
    MutexLock l(&mu_);
    count_++;
  }
  int Read() {
    MutexLock l(&mu_);
    return count_;
  }
  void Reset() {
    MutexLock l(&mu_);
    count_ = 0;
  }
};
91 92 93

struct OptionsOverride {
  std::shared_ptr<const FilterPolicy> filter_policy = nullptr;
94 95 96

  // Used as a bit mask of individual enums in which to skip an XF test point
  int skip_policy = 0;
97 98
};

99
}  // namespace anon
100

101 102 103 104
static std::string Key(int i) {
  char buf[100];
  snprintf(buf, sizeof(buf), "key%06d", i);
  return std::string(buf);
S
Sanjay Ghemawat 已提交
105 106
}

107 108 109
// Special Env used to delay background operations
class SpecialEnv : public EnvWrapper {
 public:
110
  Random rnd_;
S
sdong 已提交
111
  port::Mutex rnd_mutex_;  // Lock to pretect rnd_
112

A
Abhishek Kona 已提交
113
  // sstable Sync() calls are blocked while this pointer is non-nullptr.
I
Igor Canadi 已提交
114
  std::atomic<bool> delay_sstable_sync_;
115

116
  // Drop writes on the floor while this pointer is non-nullptr.
I
Igor Canadi 已提交
117
  std::atomic<bool> drop_writes_;
118

A
Abhishek Kona 已提交
119
  // Simulate no-space errors while this pointer is non-nullptr.
I
Igor Canadi 已提交
120
  std::atomic<bool> no_space_;
121

A
Abhishek Kona 已提交
122
  // Simulate non-writable file system while this pointer is non-nullptr
I
Igor Canadi 已提交
123
  std::atomic<bool> non_writable_;
124

A
Abhishek Kona 已提交
125
  // Force sync of manifest files to fail while this pointer is non-nullptr
I
Igor Canadi 已提交
126
  std::atomic<bool> manifest_sync_error_;
127

A
Abhishek Kona 已提交
128
  // Force write to manifest files to fail while this pointer is non-nullptr
I
Igor Canadi 已提交
129
  std::atomic<bool> manifest_write_error_;
130

I
Igor Canadi 已提交
131
  // Force write to log files to fail while this pointer is non-nullptr
I
Igor Canadi 已提交
132
  std::atomic<bool> log_write_error_;
I
Igor Canadi 已提交
133

134 135 136
  // Slow down every log write, in micro-seconds.
  std::atomic<int> log_write_slowdown_;

S
Sanjay Ghemawat 已提交
137
  bool count_random_reads_;
138
  anon::AtomicCounter random_read_counter_;
S
Sanjay Ghemawat 已提交
139

I
Igor Canadi 已提交
140 141 142
  bool count_sequential_reads_;
  anon::AtomicCounter sequential_read_counter_;

143
  anon::AtomicCounter sleep_counter_;
144

L
Lei Jin 已提交
145 146
  std::atomic<int64_t> bytes_written_;

147 148
  std::atomic<int> sync_counter_;

149 150 151 152
  std::atomic<uint32_t> non_writeable_rate_;

  std::atomic<uint32_t> new_writable_count_;

153
  std::atomic<uint32_t> non_writable_count_;
154

I
Igor Canadi 已提交
155 156
  std::function<void()>* table_write_callback_;

157 158 159
  int64_t addon_time_;

  explicit SpecialEnv(Env* base) : EnvWrapper(base), rnd_(301), addon_time_(0) {
I
Igor Canadi 已提交
160 161 162 163
    delay_sstable_sync_.store(false, std::memory_order_release);
    drop_writes_.store(false, std::memory_order_release);
    no_space_.store(false, std::memory_order_release);
    non_writable_.store(false, std::memory_order_release);
S
Sanjay Ghemawat 已提交
164
    count_random_reads_ = false;
I
Igor Canadi 已提交
165
    count_sequential_reads_ = false;
I
Igor Canadi 已提交
166 167 168
    manifest_sync_error_.store(false, std::memory_order_release);
    manifest_write_error_.store(false, std::memory_order_release);
    log_write_error_.store(false, std::memory_order_release);
169
    log_write_slowdown_ = 0;
L
Lei Jin 已提交
170
    bytes_written_ = 0;
171
    sync_counter_ = 0;
172 173
    non_writeable_rate_ = 0;
    new_writable_count_ = 0;
174
    non_writable_count_ = 0;
I
Igor Canadi 已提交
175
    table_write_callback_ = nullptr;
L
Lei Jin 已提交
176
  }
177

178
  Status NewWritableFile(const std::string& f, unique_ptr<WritableFile>* r,
I
Igor Sugak 已提交
179
                         const EnvOptions& soptions) override {
180 181 182
    class SSTableFile : public WritableFile {
     private:
      SpecialEnv* env_;
183
      unique_ptr<WritableFile> base_;
184 185

     public:
186
      SSTableFile(SpecialEnv* env, unique_ptr<WritableFile>&& base)
187
          : env_(env),
188
            base_(std::move(base)) {
189
      }
I
Igor Sugak 已提交
190
      Status Append(const Slice& data) override {
I
Igor Canadi 已提交
191 192 193
        if (env_->table_write_callback_) {
          (*env_->table_write_callback_)();
        }
I
Igor Canadi 已提交
194
        if (env_->drop_writes_.load(std::memory_order_acquire)) {
195 196
          // Drop writes on the floor
          return Status::OK();
I
Igor Canadi 已提交
197
        } else if (env_->no_space_.load(std::memory_order_acquire)) {
198
          return Status::IOError("No space left on device");
199
        } else {
L
Lei Jin 已提交
200
          env_->bytes_written_ += data.size();
201 202 203
          return base_->Append(data);
        }
      }
204 205 206 207 208 209 210 211
      Status Close() override {
        // Check preallocation size
        // preallocation size is never passed to base file.
        size_t preallocation_size = preallocation_block_size();
        TEST_SYNC_POINT_CALLBACK("DBTestWritableFile.GetPreallocationStatus",
                                 &preallocation_size);
        return base_->Close();
      }
I
Igor Sugak 已提交
212 213
      Status Flush() override { return base_->Flush(); }
      Status Sync() override {
214
        ++env_->sync_counter_;
I
Igor Canadi 已提交
215
        while (env_->delay_sstable_sync_.load(std::memory_order_acquire)) {
216 217 218 219
          env_->SleepForMicroseconds(100000);
        }
        return base_->Sync();
      }
I
Igor Sugak 已提交
220
      void SetIOPriority(Env::IOPriority pri) override {
L
Lei Jin 已提交
221 222
        base_->SetIOPriority(pri);
      }
223
    };
224 225 226
    class ManifestFile : public WritableFile {
     private:
      SpecialEnv* env_;
227
      unique_ptr<WritableFile> base_;
228
     public:
229 230
      ManifestFile(SpecialEnv* env, unique_ptr<WritableFile>&& b)
          : env_(env), base_(std::move(b)) { }
I
Igor Sugak 已提交
231
      Status Append(const Slice& data) override {
I
Igor Canadi 已提交
232
        if (env_->manifest_write_error_.load(std::memory_order_acquire)) {
233 234 235 236 237
          return Status::IOError("simulated writer error");
        } else {
          return base_->Append(data);
        }
      }
I
Igor Sugak 已提交
238 239 240
      Status Close() override { return base_->Close(); }
      Status Flush() override { return base_->Flush(); }
      Status Sync() override {
241
        ++env_->sync_counter_;
I
Igor Canadi 已提交
242
        if (env_->manifest_sync_error_.load(std::memory_order_acquire)) {
243 244 245 246 247
          return Status::IOError("simulated sync error");
        } else {
          return base_->Sync();
        }
      }
I
Igor Sugak 已提交
248
      uint64_t GetFileSize() override { return base_->GetFileSize(); }
249
    };
I
Igor Canadi 已提交
250
    class WalFile : public WritableFile {
I
Igor Canadi 已提交
251 252 253 254
     private:
      SpecialEnv* env_;
      unique_ptr<WritableFile> base_;
     public:
I
Igor Canadi 已提交
255 256
      WalFile(SpecialEnv* env, unique_ptr<WritableFile>&& b)
          : env_(env), base_(std::move(b)) {}
I
Igor Sugak 已提交
257
      Status Append(const Slice& data) override {
I
Igor Canadi 已提交
258
        if (env_->log_write_error_.load(std::memory_order_acquire)) {
I
Igor Canadi 已提交
259 260
          return Status::IOError("simulated writer error");
        } else {
261 262 263 264 265
          int slowdown =
              env_->log_write_slowdown_.load(std::memory_order_acquire);
          if (slowdown > 0) {
            env_->SleepForMicroseconds(slowdown);
          }
I
Igor Canadi 已提交
266 267 268
          return base_->Append(data);
        }
      }
I
Igor Sugak 已提交
269 270 271
      Status Close() override { return base_->Close(); }
      Status Flush() override { return base_->Flush(); }
      Status Sync() override {
272 273 274
        ++env_->sync_counter_;
        return base_->Sync();
      }
I
Igor Canadi 已提交
275
    };
276

277
    if (non_writeable_rate_.load(std::memory_order_acquire) > 0) {
S
sdong 已提交
278 279 280 281 282
      uint32_t random_number;
      {
        MutexLock l(&rnd_mutex_);
        random_number = rnd_.Uniform(100);
      }
283 284 285 286 287 288 289
      if (random_number < non_writeable_rate_.load()) {
        return Status::IOError("simulated random write error");
      }
    }

    new_writable_count_++;

290 291 292
    if (non_writable_count_.load() > 0) {
      non_writable_count_--;
      return Status::IOError("simulated write error");
293
    }
294

295
    Status s = target()->NewWritableFile(f, r, soptions);
296
    if (s.ok()) {
A
Abhishek Kona 已提交
297
      if (strstr(f.c_str(), ".sst") != nullptr) {
298
        r->reset(new SSTableFile(this, std::move(*r)));
A
Abhishek Kona 已提交
299
      } else if (strstr(f.c_str(), "MANIFEST") != nullptr) {
300
        r->reset(new ManifestFile(this, std::move(*r)));
I
Igor Canadi 已提交
301
      } else if (strstr(f.c_str(), "log") != nullptr) {
I
Igor Canadi 已提交
302
        r->reset(new WalFile(this, std::move(*r)));
303 304 305 306
      }
    }
    return s;
  }
S
Sanjay Ghemawat 已提交
307

308
  Status NewRandomAccessFile(const std::string& f,
309
                             unique_ptr<RandomAccessFile>* r,
I
Igor Sugak 已提交
310
                             const EnvOptions& soptions) override {
S
Sanjay Ghemawat 已提交
311 312
    class CountingFile : public RandomAccessFile {
     private:
313
      unique_ptr<RandomAccessFile> target_;
314
      anon::AtomicCounter* counter_;
S
Sanjay Ghemawat 已提交
315
     public:
316 317 318
      CountingFile(unique_ptr<RandomAccessFile>&& target,
                   anon::AtomicCounter* counter)
          : target_(std::move(target)), counter_(counter) {
S
Sanjay Ghemawat 已提交
319 320
      }
      virtual Status Read(uint64_t offset, size_t n, Slice* result,
I
Igor Sugak 已提交
321
                          char* scratch) const override {
S
Sanjay Ghemawat 已提交
322 323 324 325 326
        counter_->Increment();
        return target_->Read(offset, n, result, scratch);
      }
    };

327
    Status s = target()->NewRandomAccessFile(f, r, soptions);
S
Sanjay Ghemawat 已提交
328
    if (s.ok() && count_random_reads_) {
329
      r->reset(new CountingFile(std::move(*r), &random_read_counter_));
S
Sanjay Ghemawat 已提交
330 331 332
    }
    return s;
  }
333

I
Igor Canadi 已提交
334
  Status NewSequentialFile(const std::string& f, unique_ptr<SequentialFile>* r,
I
Igor Sugak 已提交
335
                           const EnvOptions& soptions) override {
I
Igor Canadi 已提交
336 337 338 339 340 341 342 343 344
    class CountingFile : public SequentialFile {
     private:
      unique_ptr<SequentialFile> target_;
      anon::AtomicCounter* counter_;

     public:
      CountingFile(unique_ptr<SequentialFile>&& target,
                   anon::AtomicCounter* counter)
          : target_(std::move(target)), counter_(counter) {}
I
Igor Sugak 已提交
345
      virtual Status Read(size_t n, Slice* result, char* scratch) override {
I
Igor Canadi 已提交
346 347 348
        counter_->Increment();
        return target_->Read(n, result, scratch);
      }
I
Igor Sugak 已提交
349
      virtual Status Skip(uint64_t n) override { return target_->Skip(n); }
I
Igor Canadi 已提交
350 351 352 353 354 355 356 357 358
    };

    Status s = target()->NewSequentialFile(f, r, soptions);
    if (s.ok() && count_sequential_reads_) {
      r->reset(new CountingFile(std::move(*r), &sequential_read_counter_));
    }
    return s;
  }

I
Igor Sugak 已提交
359
  virtual void SleepForMicroseconds(int micros) override {
360 361 362
    sleep_counter_.Increment();
    target()->SleepForMicroseconds(micros);
  }
363 364 365 366 367 368 369 370

  virtual Status GetCurrentTime(int64_t* unix_time) override {
    Status s = target()->GetCurrentTime(unix_time);
    if (s.ok()) {
      *unix_time += addon_time_;
    }
    return s;
  }
371 372 373 374

  virtual uint64_t NowNanos() override {
    return target()->NowNanos() + addon_time_ * 1000;
  }
375 376
};

I
Igor Sugak 已提交
377
class DBTest : public testing::Test {
J
Jim Paton 已提交
378
 protected:
S
Sanjay Ghemawat 已提交
379 380
  // Sequence of option configurations to try
  enum OptionConfig {
S
Stanislau Hlebik 已提交
381 382 383 384
    kDefault = 0,
    kBlockBasedTableWithPrefixHashIndex = 1,
    kBlockBasedTableWithWholeKeyHashIndex = 2,
    kPlainTableFirstBytePrefix = 3,
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
    kPlainTableCappedPrefix = 4,
    kPlainTableAllBytesPrefix = 5,
    kVectorRep = 6,
    kHashLinkList = 7,
    kHashCuckoo = 8,
    kMergePut = 9,
    kFilter = 10,
    kFullFilter = 11,
    kUncompressed = 12,
    kNumLevel_3 = 13,
    kDBLogDir = 14,
    kWalDirAndMmapReads = 15,
    kManifestFileSize = 16,
    kCompactOnFlush = 17,
    kPerfOptions = 18,
    kDeletesFilterFirst = 19,
    kHashSkipList = 20,
    kUniversalCompaction = 21,
S
sdong 已提交
403 404 405 406 407 408 409
    kUniversalCompactionMultiLevel = 22,
    kCompressedBlockCache = 23,
    kInfiniteMaxOpenFiles = 24,
    kxxHashChecksum = 25,
    kFIFOCompaction = 26,
    kOptimizeFiltersForHits = 27,
    kEnd = 28
S
Sanjay Ghemawat 已提交
410 411 412
  };
  int option_config_;

J
jorlow@chromium.org 已提交
413 414
 public:
  std::string dbname_;
415
  std::string alternative_wal_dir_;
416
  MockEnv* mem_env_;
417
  SpecialEnv* env_;
J
jorlow@chromium.org 已提交
418
  DB* db_;
419
  std::vector<ColumnFamilyHandle*> handles_;
J
jorlow@chromium.org 已提交
420 421 422

  Options last_options_;

423 424 425 426 427
  // Skip some options, as they may not be applicable to a specific test.
  // To add more skip constants, use values 4, 8, 16, etc.
  enum OptionSkip {
    kNoSkip = 0,
    kSkipDeletesFilterFirst = 1,
428
    kSkipUniversalCompaction = 2,
429
    kSkipMergePut = 4,
430
    kSkipPlainTable = 8,
431
    kSkipHashIndex = 16,
432
    kSkipNoSeekToLast = 32,
I
Igor Canadi 已提交
433 434
    kSkipHashCuckoo = 64,
    kSkipFIFOCompaction = 128,
I
Igor Canadi 已提交
435
    kSkipMmapReads = 256,
436 437
  };

438

S
Sanjay Ghemawat 已提交
439
  DBTest() : option_config_(kDefault),
440 441 442
             mem_env_(!getenv("MEM_ENV") ? nullptr :
                                           new MockEnv(Env::Default())),
             env_(new SpecialEnv(mem_env_ ? mem_env_ : Env::Default())) {
443 444
    env_->SetBackgroundThreads(1, Env::LOW);
    env_->SetBackgroundThreads(1, Env::HIGH);
445
    dbname_ = test::TmpDir(env_) + "/db_test";
446
    alternative_wal_dir_ = dbname_ + "/wal";
447
    auto options = CurrentOptions();
448 449 450 451
    auto delete_options = options;
    delete_options.wal_dir = alternative_wal_dir_;
    EXPECT_OK(DestroyDB(dbname_, delete_options));
    // Destroy it for not alternative WAL dir is used.
452
    EXPECT_OK(DestroyDB(dbname_, options));
A
Abhishek Kona 已提交
453
    db_ = nullptr;
L
Lei Jin 已提交
454
    Reopen(options);
J
jorlow@chromium.org 已提交
455 456 457
  }

  ~DBTest() {
458 459 460
    rocksdb::SyncPoint::GetInstance()->DisableProcessing();
    rocksdb::SyncPoint::GetInstance()->LoadDependency({});
    rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
461
    Close();
462
    Options options;
463 464 465 466
    options.db_paths.emplace_back(dbname_, 0);
    options.db_paths.emplace_back(dbname_ + "_2", 0);
    options.db_paths.emplace_back(dbname_ + "_3", 0);
    options.db_paths.emplace_back(dbname_ + "_4", 0);
467
    EXPECT_OK(DestroyDB(dbname_, options));
468
    delete env_;
S
Sanjay Ghemawat 已提交
469 470 471 472
  }

  // Switch to a fresh database with the next option configuration to
  // test.  Return false if there are no more configurations to test.
473
  bool ChangeOptions(int skip_mask = kNoSkip) {
474 475 476 477 478 479
    for(option_config_++; option_config_ < kEnd; option_config_++) {
      if ((skip_mask & kSkipDeletesFilterFirst) &&
          option_config_ == kDeletesFilterFirst) {
        continue;
      }
      if ((skip_mask & kSkipUniversalCompaction) &&
S
sdong 已提交
480 481
          (option_config_ == kUniversalCompaction ||
           option_config_ == kUniversalCompactionMultiLevel)) {
482 483 484 485 486
        continue;
      }
      if ((skip_mask & kSkipMergePut) && option_config_ == kMergePut) {
        continue;
      }
487 488 489 490 491
      if ((skip_mask & kSkipNoSeekToLast) &&
          (option_config_ == kHashLinkList ||
           option_config_ == kHashSkipList)) {;
        continue;
      }
492 493 494 495
      if ((skip_mask & kSkipPlainTable) &&
          (option_config_ == kPlainTableAllBytesPrefix ||
           option_config_ == kPlainTableFirstBytePrefix ||
           option_config_ == kPlainTableCappedPrefix)) {
496 497
        continue;
      }
S
Stanislau Hlebik 已提交
498
      if ((skip_mask & kSkipHashIndex) &&
499 500 501 502
          (option_config_ == kBlockBasedTableWithPrefixHashIndex ||
           option_config_ == kBlockBasedTableWithWholeKeyHashIndex)) {
        continue;
      }
503 504 505
      if ((skip_mask & kSkipHashCuckoo) && (option_config_ == kHashCuckoo)) {
        continue;
      }
I
Igor Canadi 已提交
506 507 508 509
      if ((skip_mask & kSkipFIFOCompaction) &&
          option_config_ == kFIFOCompaction) {
        continue;
      }
I
Igor Canadi 已提交
510 511 512 513
      if ((skip_mask & kSkipMmapReads) &&
          option_config_ == kWalDirAndMmapReads) {
        continue;
      }
514
      break;
515
    }
516

H
heyongqiang 已提交
517
    if (option_config_ >= kEnd) {
L
Lei Jin 已提交
518
      Destroy(last_options_);
S
Sanjay Ghemawat 已提交
519 520
      return false;
    } else {
L
Lei Jin 已提交
521 522 523
      auto options = CurrentOptions();
      options.create_if_missing = true;
      DestroyAndReopen(options);
S
Sanjay Ghemawat 已提交
524 525 526 527
      return true;
    }
  }

528
  // Switch between different compaction styles (we have only 2 now).
529
  bool ChangeCompactOptions() {
530 531
    if (option_config_ == kDefault) {
      option_config_ = kUniversalCompaction;
532
      Destroy(last_options_);
L
Lei Jin 已提交
533 534
      auto options = CurrentOptions();
      options.create_if_missing = true;
L
Lei Jin 已提交
535
      TryReopen(options);
536
      return true;
S
sdong 已提交
537 538 539 540 541 542 543
    } else if (option_config_ == kUniversalCompaction) {
      option_config_ = kUniversalCompactionMultiLevel;
      Destroy(last_options_);
      auto options = CurrentOptions();
      options.create_if_missing = true;
      TryReopen(options);
      return true;
544 545 546 547 548
    } else {
      return false;
    }
  }

549 550
  // Switch between different filter policy
  // Jump from kDefault to kFilter to kFullFilter
551
  bool ChangeFilterOptions() {
552 553 554 555 556 557 558
    if (option_config_ == kDefault) {
      option_config_ = kFilter;
    } else if (option_config_ == kFilter) {
      option_config_ = kFullFilter;
    } else {
      return false;
    }
559
    Destroy(last_options_);
L
Lei Jin 已提交
560 561 562

    auto options = CurrentOptions();
    options.create_if_missing = true;
L
Lei Jin 已提交
563
    TryReopen(options);
L
Lei Jin 已提交
564
    return true;
565 566
  }

S
Sanjay Ghemawat 已提交
567
  // Return the current option configuration.
568 569
  Options CurrentOptions(
      const anon::OptionsOverride& options_override = anon::OptionsOverride()) {
S
Sanjay Ghemawat 已提交
570
    Options options;
571
    return CurrentOptions(options, options_override);
572 573
  }

574 575 576
  Options CurrentOptions(
      const Options& defaultOptions,
      const anon::OptionsOverride& options_override = anon::OptionsOverride()) {
577 578
    // this redudant copy is to minimize code change w/o having lint error.
    Options options = defaultOptions;
579 580 581
    XFUNC_TEST("", "dbtest_options", inplace_options1, GetXFTestOptions,
               reinterpret_cast<Options*>(&options),
               options_override.skip_policy);
582 583
    BlockBasedTableOptions table_options;
    bool set_block_based_table_factory = true;
S
Sanjay Ghemawat 已提交
584
    switch (option_config_) {
I
Igor Canadi 已提交
585
      case kHashSkipList:
586
        options.prefix_extractor.reset(NewFixedPrefixTransform(1));
587 588
        options.memtable_factory.reset(
            NewHashSkipListRepFactory(16));
J
Jim Paton 已提交
589
        break;
590 591
      case kPlainTableFirstBytePrefix:
        options.table_factory.reset(new PlainTableFactory());
592
        options.prefix_extractor.reset(NewFixedPrefixTransform(1));
593 594
        options.allow_mmap_reads = true;
        options.max_sequential_skip_in_iterations = 999999;
595
        set_block_based_table_factory = false;
596 597 598 599 600 601 602
        break;
      case kPlainTableCappedPrefix:
        options.table_factory.reset(new PlainTableFactory());
        options.prefix_extractor.reset(NewCappedPrefixTransform(8));
        options.allow_mmap_reads = true;
        options.max_sequential_skip_in_iterations = 999999;
        set_block_based_table_factory = false;
603 604 605
        break;
      case kPlainTableAllBytesPrefix:
        options.table_factory.reset(new PlainTableFactory());
606
        options.prefix_extractor.reset(NewNoopTransform());
607 608
        options.allow_mmap_reads = true;
        options.max_sequential_skip_in_iterations = 999999;
609
        set_block_based_table_factory = false;
610
        break;
611
      case kMergePut:
612
        options.merge_operator = MergeOperators::CreatePutOperator();
613
        break;
S
Sanjay Ghemawat 已提交
614
      case kFilter:
615 616 617 618
        table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
        break;
      case kFullFilter:
        table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
S
Sanjay Ghemawat 已提交
619
        break;
H
heyongqiang 已提交
620
      case kUncompressed:
H
heyongqiang 已提交
621 622
        options.compression = kNoCompression;
        break;
623
      case kNumLevel_3:
H
heyongqiang 已提交
624 625 626
        options.num_levels = 3;
        break;
      case kDBLogDir:
627
        options.db_log_dir = test::TmpDir(env_);
H
heyongqiang 已提交
628
        break;
I
Igor Canadi 已提交
629
      case kWalDirAndMmapReads:
630
        options.wal_dir = alternative_wal_dir_;
I
Igor Canadi 已提交
631 632 633
        // mmap reads should be orthogonal to WalDir setting, so we piggyback to
        // this option config to test mmap reads as well
        options.allow_mmap_reads = true;
634
        break;
A
Abhishek Kona 已提交
635 636
      case kManifestFileSize:
        options.max_manifest_file_size = 50; // 50 bytes
637
      case kCompactOnFlush:
N
Naman Gupta 已提交
638 639
        options.purge_redundant_kvs_while_flush =
          !options.purge_redundant_kvs_while_flush;
640 641
        break;
      case kPerfOptions:
J
Jim Paton 已提交
642 643
        options.hard_rate_limit = 2.0;
        options.rate_limit_delay_max_milliseconds = 2;
644 645
        // TODO -- test more options
        break;
646
      case kDeletesFilterFirst:
647
        options.filter_deletes = true;
648
        break;
J
Jim Paton 已提交
649
      case kVectorRep:
650
        options.memtable_factory.reset(new VectorRepFactory(100));
J
Jim Paton 已提交
651
        break;
652
      case kHashLinkList:
653
        options.prefix_extractor.reset(NewFixedPrefixTransform(1));
654 655
        options.memtable_factory.reset(
            NewHashLinkListRepFactory(4, 0, 3, true, 4));
656
        break;
657 658 659 660
      case kHashCuckoo:
        options.memtable_factory.reset(
            NewHashCuckooRepFactory(options.write_buffer_size));
        break;
661 662
      case kUniversalCompaction:
        options.compaction_style = kCompactionStyleUniversal;
S
sdong 已提交
663 664 665 666 667
        options.num_levels = 1;
        break;
      case kUniversalCompactionMultiLevel:
        options.compaction_style = kCompactionStyleUniversal;
        options.num_levels = 8;
668
        break;
669
      case kCompressedBlockCache:
S
sdong 已提交
670
        options.allow_mmap_writes = true;
671
        table_options.block_cache_compressed = NewLRUCache(8*1024*1024);
672
        break;
673 674 675
      case kInfiniteMaxOpenFiles:
        options.max_open_files = -1;
        break;
I
xxHash  
Igor Canadi 已提交
676 677 678 679
      case kxxHashChecksum: {
        table_options.checksum = kxxHash;
        break;
      }
I
Igor Canadi 已提交
680 681 682 683
      case kFIFOCompaction: {
        options.compaction_style = kCompactionStyleFIFO;
        break;
      }
684 685 686 687 688 689 690 691 692 693
      case kBlockBasedTableWithPrefixHashIndex: {
        table_options.index_type = BlockBasedTableOptions::kHashSearch;
        options.prefix_extractor.reset(NewFixedPrefixTransform(1));
        break;
      }
      case kBlockBasedTableWithWholeKeyHashIndex: {
        table_options.index_type = BlockBasedTableOptions::kHashSearch;
        options.prefix_extractor.reset(NewNoopTransform());
        break;
      }
694 695 696 697 698 699
      case kOptimizeFiltersForHits: {
        options.optimize_filters_for_hits = true;
        set_block_based_table_factory = true;
        break;
      }

S
Sanjay Ghemawat 已提交
700 701 702
      default:
        break;
    }
703 704 705 706 707 708 709

    if (options_override.filter_policy) {
      table_options.filter_policy = options_override.filter_policy;
    }
    if (set_block_based_table_factory) {
      options.table_factory.reset(NewBlockBasedTableFactory(table_options));
    }
710 711
    options.env = env_;
    options.create_if_missing = true;
S
Sanjay Ghemawat 已提交
712
    return options;
J
jorlow@chromium.org 已提交
713 714 715 716 717 718
  }

  DBImpl* dbfull() {
    return reinterpret_cast<DBImpl*>(db_);
  }

719
  void CreateColumnFamilies(const std::vector<std::string>& cfs,
L
Lei Jin 已提交
720 721
                            const Options& options) {
    ColumnFamilyOptions cf_opts(options);
722
    size_t cfi = handles_.size();
723 724 725 726 727 728 729
    handles_.resize(cfi + cfs.size());
    for (auto cf : cfs) {
      ASSERT_OK(db_->CreateColumnFamily(cf_opts, cf, &handles_[cfi++]));
    }
  }

  void CreateAndReopenWithCF(const std::vector<std::string>& cfs,
L
Lei Jin 已提交
730
                             const Options& options) {
731 732
    CreateColumnFamilies(cfs, options);
    std::vector<std::string> cfs_plus_default = cfs;
733
    cfs_plus_default.insert(cfs_plus_default.begin(), kDefaultColumnFamilyName);
L
Lei Jin 已提交
734
    ReopenWithColumnFamilies(cfs_plus_default, options);
735 736
  }

737
  void ReopenWithColumnFamilies(const std::vector<std::string>& cfs,
L
Lei Jin 已提交
738
                                const std::vector<Options>& options) {
739 740 741
    ASSERT_OK(TryReopenWithColumnFamilies(cfs, options));
  }

742
  void ReopenWithColumnFamilies(const std::vector<std::string>& cfs,
L
Lei Jin 已提交
743
                                const Options& options) {
744 745 746 747 748
    ASSERT_OK(TryReopenWithColumnFamilies(cfs, options));
  }

  Status TryReopenWithColumnFamilies(
      const std::vector<std::string>& cfs,
L
Lei Jin 已提交
749
      const std::vector<Options>& options) {
750
    Close();
751
    EXPECT_EQ(cfs.size(), options.size());
752 753
    std::vector<ColumnFamilyDescriptor> column_families;
    for (size_t i = 0; i < cfs.size(); ++i) {
L
Lei Jin 已提交
754
      column_families.push_back(ColumnFamilyDescriptor(cfs[i], options[i]));
755
    }
L
Lei Jin 已提交
756
    DBOptions db_opts = DBOptions(options[0]);
757 758 759 760
    return DB::Open(db_opts, dbname_, column_families, &handles_, &db_);
  }

  Status TryReopenWithColumnFamilies(const std::vector<std::string>& cfs,
L
Lei Jin 已提交
761
                                     const Options& options) {
762
    Close();
L
Lei Jin 已提交
763
    std::vector<Options> v_opts(cfs.size(), options);
764 765 766
    return TryReopenWithColumnFamilies(cfs, v_opts);
  }

L
Lei Jin 已提交
767
  void Reopen(const Options& options) {
L
Lei Jin 已提交
768
    ASSERT_OK(TryReopen(options));
J
jorlow@chromium.org 已提交
769 770
  }

S
Sanjay Ghemawat 已提交
771
  void Close() {
772 773 774 775
    for (auto h : handles_) {
      delete h;
    }
    handles_.clear();
S
Sanjay Ghemawat 已提交
776
    delete db_;
A
Abhishek Kona 已提交
777
    db_ = nullptr;
S
Sanjay Ghemawat 已提交
778 779
  }

L
Lei Jin 已提交
780
  void DestroyAndReopen(const Options& options) {
N
Naman Gupta 已提交
781
    //Destroy using last options
L
Lei Jin 已提交
782
    Destroy(last_options_);
L
Lei Jin 已提交
783
    ASSERT_OK(TryReopen(options));
N
Naman Gupta 已提交
784 785
  }

L
Lei Jin 已提交
786
  void Destroy(const Options& options) {
787
    Close();
L
Lei Jin 已提交
788
    ASSERT_OK(DestroyDB(dbname_, options));
J
jorlow@chromium.org 已提交
789 790
  }

791 792
  Status ReadOnlyReopen(const Options& options) {
    return DB::OpenForReadOnly(options, dbname_, &db_);
793 794
  }

L
Lei Jin 已提交
795
  Status TryReopen(const Options& options) {
796
    Close();
L
Lei Jin 已提交
797 798
    last_options_ = options;
    return DB::Open(options, dbname_, &db_);
J
jorlow@chromium.org 已提交
799 800
  }

801 802 803 804 805 806 807 808
  Status Flush(int cf = 0) {
    if (cf == 0) {
      return db_->Flush(FlushOptions());
    } else {
      return db_->Flush(FlushOptions(), handles_[cf]);
    }
  }

809
  Status Put(const Slice& k, const Slice& v, WriteOptions wo = WriteOptions()) {
810
    if (kMergePut == option_config_ ) {
811
      return db_->Merge(wo, k, v);
812
    } else {
813
      return db_->Put(wo, k, v);
814
    }
J
jorlow@chromium.org 已提交
815 816
  }

817 818 819 820 821 822 823 824 825
  Status Put(int cf, const Slice& k, const Slice& v,
             WriteOptions wo = WriteOptions()) {
    if (kMergePut == option_config_) {
      return db_->Merge(wo, handles_[cf], k, v);
    } else {
      return db_->Put(wo, handles_[cf], k, v);
    }
  }

J
jorlow@chromium.org 已提交
826
  Status Delete(const std::string& k) {
827
    return db_->Delete(WriteOptions(), k);
J
jorlow@chromium.org 已提交
828 829
  }

830 831 832 833
  Status Delete(int cf, const std::string& k) {
    return db_->Delete(WriteOptions(), handles_[cf], k);
  }

A
Abhishek Kona 已提交
834
  std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) {
J
jorlow@chromium.org 已提交
835
    ReadOptions options;
836
    options.verify_checksums = true;
J
jorlow@chromium.org 已提交
837 838 839 840 841 842 843 844 845 846 847
    options.snapshot = snapshot;
    std::string result;
    Status s = db_->Get(options, k, &result);
    if (s.IsNotFound()) {
      result = "NOT_FOUND";
    } else if (!s.ok()) {
      result = s.ToString();
    }
    return result;
  }

848 849 850 851 852 853 854 855 856 857 858 859 860 861 862
  std::string Get(int cf, const std::string& k,
                  const Snapshot* snapshot = nullptr) {
    ReadOptions options;
    options.verify_checksums = true;
    options.snapshot = snapshot;
    std::string result;
    Status s = db_->Get(options, handles_[cf], k, &result);
    if (s.IsNotFound()) {
      result = "NOT_FOUND";
    } else if (!s.ok()) {
      result = s.ToString();
    }
    return result;
  }

863 864
  uint64_t GetNumSnapshots() {
    uint64_t int_num;
865
    EXPECT_TRUE(dbfull()->GetIntProperty("rocksdb.num-snapshots", &int_num));
866 867 868 869 870
    return int_num;
  }

  uint64_t GetTimeOldestSnapshots() {
    uint64_t int_num;
871
    EXPECT_TRUE(
872 873 874 875
        dbfull()->GetIntProperty("rocksdb.oldest-snapshot-time", &int_num));
    return int_num;
  }

H
Hans Wennborg 已提交
876 877
  // Return a string that contains all key,value pairs in order,
  // formatted like "(k1->v1)(k2->v2)".
878
  std::string Contents(int cf = 0) {
H
Hans Wennborg 已提交
879 880
    std::vector<std::string> forward;
    std::string result;
881 882
    Iterator* iter = (cf == 0) ? db_->NewIterator(ReadOptions())
                               : db_->NewIterator(ReadOptions(), handles_[cf]);
H
Hans Wennborg 已提交
883 884 885 886 887 888 889 890 891
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
      std::string s = IterStatus(iter);
      result.push_back('(');
      result.append(s);
      result.push_back(')');
      forward.push_back(s);
    }

    // Check reverse iteration results are the reverse of forward results
892
    unsigned int matched = 0;
H
Hans Wennborg 已提交
893
    for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
894 895
      EXPECT_LT(matched, forward.size());
      EXPECT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]);
H
Hans Wennborg 已提交
896 897
      matched++;
    }
898
    EXPECT_EQ(matched, forward.size());
H
Hans Wennborg 已提交
899 900 901 902 903

    delete iter;
    return result;
  }

904
  std::string AllEntriesFor(const Slice& user_key, int cf = 0) {
905
    Arena arena;
906
    ScopedArenaIterator iter;
907
    if (cf == 0) {
908
      iter.set(dbfull()->TEST_NewInternalIterator(&arena));
909
    } else {
910
      iter.set(dbfull()->TEST_NewInternalIterator(&arena, handles_[cf]));
911
    }
J
jorlow@chromium.org 已提交
912 913 914 915 916 917 918 919 920
    InternalKey target(user_key, kMaxSequenceNumber, kTypeValue);
    iter->Seek(target.Encode());
    std::string result;
    if (!iter->status().ok()) {
      result = iter->status().ToString();
    } else {
      result = "[ ";
      bool first = true;
      while (iter->Valid()) {
I
Igor Canadi 已提交
921
        ParsedInternalKey ikey(Slice(), 0, kTypeValue);
J
jorlow@chromium.org 已提交
922 923 924
        if (!ParseInternalKey(iter->key(), &ikey)) {
          result += "CORRUPTED";
        } else {
S
Sanjay Ghemawat 已提交
925
          if (last_options_.comparator->Compare(ikey.user_key, user_key) != 0) {
J
jorlow@chromium.org 已提交
926 927 928 929 930 931 932 933 934 935
            break;
          }
          if (!first) {
            result += ", ";
          }
          first = false;
          switch (ikey.type) {
            case kTypeValue:
              result += iter->value().ToString();
              break;
936 937 938 939
            case kTypeMerge:
              // keep it the same as kTypeValue for testing kMergePut
              result += iter->value().ToString();
              break;
J
jorlow@chromium.org 已提交
940 941 942
            case kTypeDeletion:
              result += "DEL";
              break;
943
            default:
J
Jim Paton 已提交
944 945
              assert(false);
              break;
J
jorlow@chromium.org 已提交
946 947 948 949 950 951 952 953 954 955 956 957
          }
        }
        iter->Next();
      }
      if (!first) {
        result += " ";
      }
      result += "]";
    }
    return result;
  }

S
sdong 已提交
958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983
  int NumSortedRuns(int cf = 0) {
    ColumnFamilyMetaData cf_meta;
    if (cf == 0) {
      db_->GetColumnFamilyMetaData(&cf_meta);
    } else {
      db_->GetColumnFamilyMetaData(handles_[cf], &cf_meta);
    }
    int num_sr = static_cast<int>(cf_meta.levels[0].files.size());
    for (size_t i = 1U; i < cf_meta.levels.size(); i++) {
      if (cf_meta.levels[i].files.size() > 0) {
        num_sr++;
      }
    }
    return num_sr;
  }

  uint64_t TotalSize(int cf = 0) {
    ColumnFamilyMetaData cf_meta;
    if (cf == 0) {
      db_->GetColumnFamilyMetaData(&cf_meta);
    } else {
      db_->GetColumnFamilyMetaData(handles_[cf], &cf_meta);
    }
    return cf_meta.size;
  }

984
  int NumTableFilesAtLevel(int level, int cf = 0) {
985
    std::string property;
986 987
    if (cf == 0) {
      // default cfd
988
      EXPECT_TRUE(db_->GetProperty(
989 990
          "rocksdb.num-files-at-level" + NumberToString(level), &property));
    } else {
991
      EXPECT_TRUE(db_->GetProperty(
992 993 994
          handles_[cf], "rocksdb.num-files-at-level" + NumberToString(level),
          &property));
    }
995
    return atoi(property.c_str());
J
jorlow@chromium.org 已提交
996 997
  }

998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009
  uint64_t SizeAtLevel(int level) {
    std::vector<LiveFileMetaData> metadata;
    db_->GetLiveFilesMetaData(&metadata);
    uint64_t sum = 0;
    for (const auto& m : metadata) {
      if (m.level == level) {
        sum += m.size;
      }
    }
    return sum;
  }

S
sdong 已提交
1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023
  int TotalLiveFiles(int cf = 0) {
    ColumnFamilyMetaData cf_meta;
    if (cf == 0) {
      db_->GetColumnFamilyMetaData(&cf_meta);
    } else {
      db_->GetColumnFamilyMetaData(handles_[cf], &cf_meta);
    }
    int num_files = 0;
    for (auto& level : cf_meta.levels) {
      num_files += level.files.size();
    }
    return num_files;
  }

1024 1025 1026 1027
  int TotalTableFiles(int cf = 0, int levels = -1) {
    if (levels == -1) {
      levels = CurrentOptions().num_levels;
    }
1028
    int result = 0;
1029 1030
    for (int level = 0; level < levels; level++) {
      result += NumTableFilesAtLevel(level, cf);
1031 1032 1033 1034
    }
    return result;
  }

G
Gabor Cselle 已提交
1035
  // Return spread of files per level
1036 1037 1038
  std::string FilesPerLevel(int cf = 0) {
    int num_levels =
        (cf == 0) ? db_->NumberLevels() : db_->NumberLevels(handles_[1]);
G
Gabor Cselle 已提交
1039
    std::string result;
1040
    size_t last_non_zero_offset = 0;
1041 1042
    for (int level = 0; level < num_levels; level++) {
      int f = NumTableFilesAtLevel(level, cf);
G
Gabor Cselle 已提交
1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053
      char buf[100];
      snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
      result += buf;
      if (f > 0) {
        last_non_zero_offset = result.size();
      }
    }
    result.resize(last_non_zero_offset);
    return result;
  }

1054
  size_t CountFiles() {
1055 1056
    std::vector<std::string> files;
    env_->GetChildren(dbname_, &files);
1057 1058 1059 1060 1061 1062

    std::vector<std::string> logfiles;
    if (dbname_ != last_options_.wal_dir) {
      env_->GetChildren(last_options_.wal_dir, &logfiles);
    }

1063
    return files.size() + logfiles.size();
1064 1065
  }

1066
  size_t CountLiveFiles() {
1067 1068 1069
    std::vector<LiveFileMetaData> metadata;
    db_->GetLiveFilesMetaData(&metadata);
    return metadata.size();
1070 1071
  }

1072
  uint64_t Size(const Slice& start, const Slice& limit, int cf = 0) {
J
jorlow@chromium.org 已提交
1073 1074
    Range r(start, limit);
    uint64_t size;
1075 1076 1077 1078 1079
    if (cf == 0) {
      db_->GetApproximateSizes(&r, 1, &size);
    } else {
      db_->GetApproximateSizes(handles_[1], &r, 1, &size);
    }
J
jorlow@chromium.org 已提交
1080 1081 1082
    return size;
  }

1083 1084 1085 1086 1087 1088
  void Compact(int cf, const Slice& start, const Slice& limit,
               uint32_t target_path_id) {
    ASSERT_OK(db_->CompactRange(handles_[cf], &start, &limit, false, -1,
                                target_path_id));
  }

1089 1090 1091 1092
  void Compact(int cf, const Slice& start, const Slice& limit) {
    ASSERT_OK(db_->CompactRange(handles_[cf], &start, &limit));
  }

1093
  void Compact(const Slice& start, const Slice& limit) {
1094
    ASSERT_OK(db_->CompactRange(&start, &limit));
G
Gabor Cselle 已提交
1095 1096 1097 1098
  }

  // Do n memtable compactions, each of which produces an sstable
  // covering the range [small,large].
1099 1100
  void MakeTables(int n, const std::string& small, const std::string& large,
                  int cf = 0) {
G
Gabor Cselle 已提交
1101
    for (int i = 0; i < n; i++) {
1102 1103 1104
      ASSERT_OK(Put(cf, small, "begin"));
      ASSERT_OK(Put(cf, large, "end"));
      ASSERT_OK(Flush(cf));
1105 1106 1107
    }
  }

1108 1109
  // Prevent pushing of new sstables into deeper levels by adding
  // tables that cover a specified range to all levels.
1110 1111 1112
  void FillLevels(const std::string& smallest, const std::string& largest,
                  int cf) {
    MakeTables(db_->NumberLevels(handles_[cf]), smallest, largest, cf);
1113 1114
  }

1115 1116 1117 1118 1119
  void DumpFileCounts(const char* label) {
    fprintf(stderr, "---\n%s:\n", label);
    fprintf(stderr, "maxoverlap: %lld\n",
            static_cast<long long>(
                dbfull()->TEST_MaxNextLevelOverlappingBytes()));
1120
    for (int level = 0; level < db_->NumberLevels(); level++) {
1121 1122 1123 1124 1125 1126
      int num = NumTableFilesAtLevel(level);
      if (num > 0) {
        fprintf(stderr, "  level %3d : %d files\n", level, num);
      }
    }
  }
J
jorlow@chromium.org 已提交
1127

G
Gabor Cselle 已提交
1128 1129
  std::string DumpSSTableList() {
    std::string property;
1130
    db_->GetProperty("rocksdb.sstables", &property);
G
Gabor Cselle 已提交
1131 1132 1133
    return property;
  }

1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148
  int GetSstFileCount(std::string path) {
    std::vector<std::string> files;
    env_->GetChildren(path, &files);

    int sst_count = 0;
    uint64_t number;
    FileType type;
    for (size_t i = 0; i < files.size(); i++) {
      if (ParseFileName(files[i], &number, &type) && type == kTableFile) {
        sst_count++;
      }
    }
    return sst_count;
  }

1149
  // this will generate non-overlapping files since it keeps increasing key_idx
1150
  void GenerateNewFile(Random* rnd, int* key_idx, bool nowait = false) {
1151 1152 1153 1154
    for (int i = 0; i < 11; i++) {
      ASSERT_OK(Put(Key(*key_idx), RandomString(rnd, (i == 10) ? 1 : 10000)));
      (*key_idx)++;
    }
1155 1156 1157 1158
    if (!nowait) {
      dbfull()->TEST_WaitForFlushMemTable();
      dbfull()->TEST_WaitForCompact();
    }
1159 1160
  }

1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171
  void GenerateNewRandomFile(Random* rnd, bool nowait = false) {
    for (int i = 0; i < 100; i++) {
      ASSERT_OK(Put("key" + RandomString(rnd, 7), RandomString(rnd, 1000)));
    }
    ASSERT_OK(Put("key" + RandomString(rnd, 7), RandomString(rnd, 1)));
    if (!nowait) {
      dbfull()->TEST_WaitForFlushMemTable();
      dbfull()->TEST_WaitForCompact();
    }
  }

J
jorlow@chromium.org 已提交
1172 1173 1174 1175 1176 1177 1178 1179 1180
  std::string IterStatus(Iterator* iter) {
    std::string result;
    if (iter->Valid()) {
      result = iter->key().ToString() + "->" + iter->value().ToString();
    } else {
      result = "(invalid)";
    }
    return result;
  }
1181 1182 1183 1184 1185 1186 1187 1188 1189

  Options OptionsForLogIterTest() {
    Options options = CurrentOptions();
    options.create_if_missing = true;
    options.WAL_ttl_seconds = 1000;
    return options;
  }

  std::unique_ptr<TransactionLogIterator> OpenTransactionLogIter(
1190
      const SequenceNumber seq) {
1191 1192
    unique_ptr<TransactionLogIterator> iter;
    Status status = dbfull()->GetUpdatesSince(seq, &iter);
1193 1194
    EXPECT_OK(status);
    EXPECT_TRUE(iter->Valid());
1195 1196 1197 1198 1199 1200
    return std::move(iter);
  }

  std::string DummyString(size_t len, char c = 'a') {
    return std::string(len, c);
  }
1201

1202 1203
  void VerifyIterLast(std::string expected_key, int cf = 0) {
    Iterator* iter;
1204
    ReadOptions ro;
1205
    if (cf == 0) {
1206
      iter = db_->NewIterator(ro);
1207
    } else {
1208
      iter = db_->NewIterator(ro, handles_[cf]);
1209
    }
1210 1211 1212 1213
    iter->SeekToLast();
    ASSERT_EQ(IterStatus(iter), expected_key);
    delete iter;
  }
I
Igor Canadi 已提交
1214

1215 1216 1217 1218 1219
  // Used to test InplaceUpdate

  // If previous value is nullptr or delta is > than previous value,
  //   sets newValue with delta
  // If previous value is not empty,
1220 1221 1222 1223 1224
  //   updates previous value with 'b' string of previous value size - 1.
  static UpdateStatus
      updateInPlaceSmallerSize(char* prevValue, uint32_t* prevSize,
                               Slice delta, std::string* newValue) {
    if (prevValue == nullptr) {
1225
      *newValue = std::string(delta.size(), 'c');
1226
      return UpdateStatus::UPDATED;
1227
    } else {
1228 1229
      *prevSize = *prevSize - 1;
      std::string str_b = std::string(*prevSize, 'b');
1230
      memcpy(prevValue, str_b.c_str(), str_b.size());
1231
      return UpdateStatus::UPDATED_INPLACE;
1232 1233 1234
    }
  }

1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262
  static UpdateStatus
      updateInPlaceSmallerVarintSize(char* prevValue, uint32_t* prevSize,
                                     Slice delta, std::string* newValue) {
    if (prevValue == nullptr) {
      *newValue = std::string(delta.size(), 'c');
      return UpdateStatus::UPDATED;
    } else {
      *prevSize = 1;
      std::string str_b = std::string(*prevSize, 'b');
      memcpy(prevValue, str_b.c_str(), str_b.size());
      return UpdateStatus::UPDATED_INPLACE;
    }
  }

  static UpdateStatus
      updateInPlaceLargerSize(char* prevValue, uint32_t* prevSize,
                              Slice delta, std::string* newValue) {
    *newValue = std::string(delta.size(), 'c');
    return UpdateStatus::UPDATED;
  }

  static UpdateStatus
      updateInPlaceNoAction(char* prevValue, uint32_t* prevSize,
                            Slice delta, std::string* newValue) {
    return UpdateStatus::UPDATE_FAILED;
  }

  // Utility method to test InplaceUpdate
1263
  void validateNumberOfEntries(int numValues, int cf = 0) {
1264 1265
    ScopedArenaIterator iter;
    Arena arena;
1266
    if (cf != 0) {
1267
      iter.set(dbfull()->TEST_NewInternalIterator(&arena, handles_[cf]));
1268
    } else {
1269
      iter.set(dbfull()->TEST_NewInternalIterator(&arena));
1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283
    }
    iter->SeekToFirst();
    ASSERT_EQ(iter->status().ok(), true);
    int seq = numValues;
    while (iter->Valid()) {
      ParsedInternalKey ikey;
      ikey.sequence = -1;
      ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true);

      // checks sequence number for updates
      ASSERT_EQ(ikey.sequence, (unsigned)seq--);
      iter->Next();
    }
    ASSERT_EQ(0, seq);
1284
  }
K
Kai Liu 已提交
1285

I
Igor Canadi 已提交
1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309
  void CopyFile(const std::string& source, const std::string& destination,
                uint64_t size = 0) {
    const EnvOptions soptions;
    unique_ptr<SequentialFile> srcfile;
    ASSERT_OK(env_->NewSequentialFile(source, &srcfile, soptions));
    unique_ptr<WritableFile> destfile;
    ASSERT_OK(env_->NewWritableFile(destination, &destfile, soptions));

    if (size == 0) {
      // default argument means copy everything
      ASSERT_OK(env_->GetFileSize(source, &size));
    }

    char buffer[4096];
    Slice slice;
    while (size > 0) {
      uint64_t one = std::min(uint64_t(sizeof(buffer)), size);
      ASSERT_OK(srcfile->Read(one, &slice, buffer));
      ASSERT_OK(destfile->Append(slice));
      size -= slice.size();
    }
    ASSERT_OK(destfile->Close());
  }

J
jorlow@chromium.org 已提交
1310 1311
};

I
Igor Canadi 已提交
1312 1313 1314 1315
static long TestGetTickerCount(const Options& options, Tickers ticker_type) {
  return options.statistics->getTickerCount(ticker_type);
}

1316 1317
// A helper function that ensures the table properties returned in
// `GetPropertiesOfAllTablesTest` is correct.
C
clark.kang 已提交
1318
// This test assumes entries size is different for each of the tables.
I
Igor Canadi 已提交
1319
namespace {
1320 1321 1322 1323
void VerifyTableProperties(DB* db, uint64_t expected_entries_size) {
  TablePropertiesCollection props;
  ASSERT_OK(db->GetPropertiesOfAllTables(&props));

I
Igor Canadi 已提交
1324
  ASSERT_EQ(4U, props.size());
1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336
  std::unordered_set<uint64_t> unique_entries;

  // Indirect test
  uint64_t sum = 0;
  for (const auto& item : props) {
    unique_entries.insert(item.second->num_entries);
    sum += item.second->num_entries;
  }

  ASSERT_EQ(props.size(), unique_entries.size());
  ASSERT_EQ(expected_entries_size, sum);
}
I
Igor Canadi 已提交
1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347

uint64_t GetNumberOfSstFilesForColumnFamily(DB* db,
                                            std::string column_family_name) {
  std::vector<LiveFileMetaData> metadata;
  db->GetLiveFilesMetaData(&metadata);
  uint64_t result = 0;
  for (auto& fileMetadata : metadata) {
    result += (fileMetadata.column_family_name == column_family_name);
  }
  return result;
}
I
Igor Canadi 已提交
1348
}  // namespace
1349

I
Igor Sugak 已提交
1350
TEST_F(DBTest, Empty) {
S
Sanjay Ghemawat 已提交
1351
  do {
1352
    Options options;
K
kailiu 已提交
1353 1354
    options.env = env_;
    options.write_buffer_size = 100000;  // Small write buffer
1355
    options = CurrentOptions(options);
L
Lei Jin 已提交
1356
    CreateAndReopenWithCF({"pikachu"}, options);
J
jorlow@chromium.org 已提交
1357

1358 1359 1360 1361 1362
    std::string num;
    ASSERT_TRUE(dbfull()->GetProperty(
        handles_[1], "rocksdb.num-entries-active-mem-table", &num));
    ASSERT_EQ("0", num);

1363 1364
    ASSERT_OK(Put(1, "foo", "v1"));
    ASSERT_EQ("v1", Get(1, "foo"));
1365 1366 1367
    ASSERT_TRUE(dbfull()->GetProperty(
        handles_[1], "rocksdb.num-entries-active-mem-table", &num));
    ASSERT_EQ("1", num);
K
kailiu 已提交
1368

I
Igor Canadi 已提交
1369 1370
    // Block sync calls
    env_->delay_sstable_sync_.store(true, std::memory_order_release);
1371
    Put(1, "k1", std::string(100000, 'x'));         // Fill memtable
1372 1373 1374 1375
    ASSERT_TRUE(dbfull()->GetProperty(
        handles_[1], "rocksdb.num-entries-active-mem-table", &num));
    ASSERT_EQ("2", num);

1376
    Put(1, "k2", std::string(100000, 'y'));         // Trigger compaction
1377 1378 1379 1380
    ASSERT_TRUE(dbfull()->GetProperty(
        handles_[1], "rocksdb.num-entries-active-mem-table", &num));
    ASSERT_EQ("1", num);

1381
    ASSERT_EQ("v1", Get(1, "foo"));
I
Igor Canadi 已提交
1382 1383
    // Release sync calls
    env_->delay_sstable_sync_.store(false, std::memory_order_release);
1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408

    ASSERT_OK(db_->DisableFileDeletions());
    ASSERT_TRUE(
        dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num));
    ASSERT_EQ("1", num);

    ASSERT_OK(db_->DisableFileDeletions());
    ASSERT_TRUE(
        dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num));
    ASSERT_EQ("2", num);

    ASSERT_OK(db_->DisableFileDeletions());
    ASSERT_TRUE(
        dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num));
    ASSERT_EQ("3", num);

    ASSERT_OK(db_->EnableFileDeletions(false));
    ASSERT_TRUE(
        dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num));
    ASSERT_EQ("2", num);

    ASSERT_OK(db_->EnableFileDeletions());
    ASSERT_TRUE(
        dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num));
    ASSERT_EQ("0", num);
S
Sanjay Ghemawat 已提交
1409
  } while (ChangeOptions());
J
jorlow@chromium.org 已提交
1410 1411
}

I
Igor Sugak 已提交
1412
TEST_F(DBTest, WriteEmptyBatch) {
1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432
  Options options;
  options.env = env_;
  options.write_buffer_size = 100000;
  options = CurrentOptions(options);
  CreateAndReopenWithCF({"pikachu"}, options);

  ASSERT_OK(Put(1, "foo", "bar"));
  env_->sync_counter_.store(0);
  WriteOptions wo;
  wo.sync = true;
  wo.disableWAL = false;
  WriteBatch empty_batch;
  ASSERT_OK(dbfull()->Write(wo, &empty_batch));
  ASSERT_GE(env_->sync_counter_.load(), 1);

  // make sure we can re-open it.
  ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
  ASSERT_EQ("bar", Get(1, "foo"));
}

I
Igor Sugak 已提交
1433
TEST_F(DBTest, ReadOnlyDB) {
1434 1435 1436 1437 1438
  ASSERT_OK(Put("foo", "v1"));
  ASSERT_OK(Put("bar", "v2"));
  ASSERT_OK(Put("foo", "v3"));
  Close();

1439 1440 1441
  auto options = CurrentOptions();
  assert(options.env = env_);
  ASSERT_OK(ReadOnlyReopen(options));
1442 1443 1444 1445 1446 1447 1448 1449 1450 1451
  ASSERT_EQ("v3", Get("foo"));
  ASSERT_EQ("v2", Get("bar"));
  Iterator* iter = db_->NewIterator(ReadOptions());
  int count = 0;
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
    ASSERT_OK(iter->status());
    ++count;
  }
  ASSERT_EQ(count, 2);
  delete iter;
1452 1453 1454
  Close();

  // Reopen and flush memtable.
L
Lei Jin 已提交
1455
  Reopen(options);
1456 1457 1458
  Flush();
  Close();
  // Now check keys in read only mode.
1459
  ASSERT_OK(ReadOnlyReopen(options));
1460 1461
  ASSERT_EQ("v3", Get("foo"));
  ASSERT_EQ("v2", Get("bar"));
1462 1463
}

I
Igor Sugak 已提交
1464
TEST_F(DBTest, CompactedDB) {
L
Lei Jin 已提交
1465 1466 1467 1468 1469 1470 1471 1472
  const uint64_t kFileSize = 1 << 20;
  Options options;
  options.disable_auto_compactions = true;
  options.max_mem_compaction_level = 0;
  options.write_buffer_size = kFileSize;
  options.target_file_size_base = kFileSize;
  options.max_bytes_for_level_base = 1 << 30;
  options.compression = kNoCompression;
1473
  options = CurrentOptions(options);
L
Lei Jin 已提交
1474
  Reopen(options);
L
Lei Jin 已提交
1475 1476 1477 1478
  // 1 L0 file, use CompactedDB if max_open_files = -1
  ASSERT_OK(Put("aaa", DummyString(kFileSize / 2, '1')));
  Flush();
  Close();
1479
  ASSERT_OK(ReadOnlyReopen(options));
L
Lei Jin 已提交
1480 1481 1482 1483 1484 1485
  Status s = Put("new", "value");
  ASSERT_EQ(s.ToString(),
            "Not implemented: Not supported operation in read only mode.");
  ASSERT_EQ(DummyString(kFileSize / 2, '1'), Get("aaa"));
  Close();
  options.max_open_files = -1;
1486
  ASSERT_OK(ReadOnlyReopen(options));
L
Lei Jin 已提交
1487 1488 1489 1490 1491
  s = Put("new", "value");
  ASSERT_EQ(s.ToString(),
            "Not implemented: Not supported in compacted db mode.");
  ASSERT_EQ(DummyString(kFileSize / 2, '1'), Get("aaa"));
  Close();
L
Lei Jin 已提交
1492
  Reopen(options);
L
Lei Jin 已提交
1493 1494 1495 1496 1497 1498
  // Add more L0 files
  ASSERT_OK(Put("bbb", DummyString(kFileSize / 2, '2')));
  Flush();
  ASSERT_OK(Put("aaa", DummyString(kFileSize / 2, 'a')));
  Flush();
  ASSERT_OK(Put("bbb", DummyString(kFileSize / 2, 'b')));
1499
  ASSERT_OK(Put("eee", DummyString(kFileSize / 2, 'e')));
L
Lei Jin 已提交
1500 1501 1502
  Flush();
  Close();

1503
  ASSERT_OK(ReadOnlyReopen(options));
L
Lei Jin 已提交
1504 1505 1506 1507 1508 1509 1510
  // Fallback to read-only DB
  s = Put("new", "value");
  ASSERT_EQ(s.ToString(),
            "Not implemented: Not supported operation in read only mode.");
  Close();

  // Full compaction
L
Lei Jin 已提交
1511
  Reopen(options);
L
Lei Jin 已提交
1512 1513 1514 1515 1516 1517 1518 1519 1520 1521
  // Add more keys
  ASSERT_OK(Put("fff", DummyString(kFileSize / 2, 'f')));
  ASSERT_OK(Put("hhh", DummyString(kFileSize / 2, 'h')));
  ASSERT_OK(Put("iii", DummyString(kFileSize / 2, 'i')));
  ASSERT_OK(Put("jjj", DummyString(kFileSize / 2, 'j')));
  db_->CompactRange(nullptr, nullptr);
  ASSERT_EQ(3, NumTableFilesAtLevel(1));
  Close();

  // CompactedDB
1522
  ASSERT_OK(ReadOnlyReopen(options));
L
Lei Jin 已提交
1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536
  s = Put("new", "value");
  ASSERT_EQ(s.ToString(),
            "Not implemented: Not supported in compacted db mode.");
  ASSERT_EQ("NOT_FOUND", Get("abc"));
  ASSERT_EQ(DummyString(kFileSize / 2, 'a'), Get("aaa"));
  ASSERT_EQ(DummyString(kFileSize / 2, 'b'), Get("bbb"));
  ASSERT_EQ("NOT_FOUND", Get("ccc"));
  ASSERT_EQ(DummyString(kFileSize / 2, 'e'), Get("eee"));
  ASSERT_EQ(DummyString(kFileSize / 2, 'f'), Get("fff"));
  ASSERT_EQ("NOT_FOUND", Get("ggg"));
  ASSERT_EQ(DummyString(kFileSize / 2, 'h'), Get("hhh"));
  ASSERT_EQ(DummyString(kFileSize / 2, 'i'), Get("iii"));
  ASSERT_EQ(DummyString(kFileSize / 2, 'j'), Get("jjj"));
  ASSERT_EQ("NOT_FOUND", Get("kkk"));
1537 1538 1539 1540 1541 1542 1543

  // MultiGet
  std::vector<std::string> values;
  std::vector<Status> status_list = dbfull()->MultiGet(ReadOptions(),
      std::vector<Slice>({Slice("aaa"), Slice("ccc"), Slice("eee"),
                          Slice("ggg"), Slice("iii"), Slice("kkk")}),
      &values);
1544 1545
  ASSERT_EQ(status_list.size(), static_cast<uint64_t>(6));
  ASSERT_EQ(values.size(), static_cast<uint64_t>(6));
1546 1547 1548 1549 1550 1551 1552 1553 1554
  ASSERT_OK(status_list[0]);
  ASSERT_EQ(DummyString(kFileSize / 2, 'a'), values[0]);
  ASSERT_TRUE(status_list[1].IsNotFound());
  ASSERT_OK(status_list[2]);
  ASSERT_EQ(DummyString(kFileSize / 2, 'e'), values[2]);
  ASSERT_TRUE(status_list[3].IsNotFound());
  ASSERT_OK(status_list[4]);
  ASSERT_EQ(DummyString(kFileSize / 2, 'i'), values[4]);
  ASSERT_TRUE(status_list[5].IsNotFound());
L
Lei Jin 已提交
1555 1556
}

K
Kai Liu 已提交
1557 1558
// Make sure that when options.block_cache is set, after a new table is
// created its index/filter blocks are added to block cache.
I
Igor Sugak 已提交
1559
TEST_F(DBTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
K
Kai Liu 已提交
1560 1561 1562
  Options options = CurrentOptions();
  options.create_if_missing = true;
  options.statistics = rocksdb::CreateDBStatistics();
1563 1564
  BlockBasedTableOptions table_options;
  table_options.cache_index_and_filter_blocks = true;
1565
  table_options.filter_policy.reset(NewBloomFilterPolicy(20));
1566
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
L
Lei Jin 已提交
1567
  CreateAndReopenWithCF({"pikachu"}, options);
K
Kai Liu 已提交
1568

1569 1570 1571
  ASSERT_OK(Put(1, "key", "val"));
  // Create a new table.
  ASSERT_OK(Flush(1));
K
Kai Liu 已提交
1572 1573

  // index/filter blocks added to block cache right after table creation.
I
Igor Canadi 已提交
1574 1575
  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
K
Kai Liu 已提交
1576
  ASSERT_EQ(2, /* only index/filter were added */
I
Igor Canadi 已提交
1577 1578
            TestGetTickerCount(options, BLOCK_CACHE_ADD));
  ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
1579 1580 1581 1582
  uint64_t int_num;
  ASSERT_TRUE(
      dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num));
  ASSERT_EQ(int_num, 0U);
K
Kai Liu 已提交
1583 1584 1585 1586

  // Make sure filter block is in cache.
  std::string value;
  ReadOptions ropt;
1587
  db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
K
Kai Liu 已提交
1588 1589

  // Miss count should remain the same.
I
Igor Canadi 已提交
1590 1591
  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
K
Kai Liu 已提交
1592

1593
  db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
I
Igor Canadi 已提交
1594 1595
  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
K
Kai Liu 已提交
1596 1597

  // Make sure index block is in cache.
I
Igor Canadi 已提交
1598
  auto index_block_hit = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
1599
  value = Get(1, "key");
I
Igor Canadi 已提交
1600
  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
K
Kai Liu 已提交
1601
  ASSERT_EQ(index_block_hit + 1,
I
Igor Canadi 已提交
1602
            TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
K
Kai Liu 已提交
1603

1604
  value = Get(1, "key");
I
Igor Canadi 已提交
1605
  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
K
Kai Liu 已提交
1606
  ASSERT_EQ(index_block_hit + 2,
I
Igor Canadi 已提交
1607
            TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
K
Kai Liu 已提交
1608 1609
}

1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653
TEST_F(DBTest, ParanoidFileChecks) {
  Options options = CurrentOptions();
  options.create_if_missing = true;
  options.statistics = rocksdb::CreateDBStatistics();
  options.level0_file_num_compaction_trigger = 2;
  options.paranoid_file_checks = true;
  BlockBasedTableOptions table_options;
  table_options.cache_index_and_filter_blocks = false;
  table_options.filter_policy.reset(NewBloomFilterPolicy(20));
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
  CreateAndReopenWithCF({"pikachu"}, options);

  ASSERT_OK(Put(1, "1_key", "val"));
  ASSERT_OK(Put(1, "9_key", "val"));
  // Create a new table.
  ASSERT_OK(Flush(1));
  ASSERT_EQ(1, /* read and cache data block */
            TestGetTickerCount(options, BLOCK_CACHE_ADD));

  ASSERT_OK(Put(1, "1_key2", "val2"));
  ASSERT_OK(Put(1, "9_key2", "val2"));
  // Create a new SST file. This will further trigger a compaction
  // and generate another file.
  ASSERT_OK(Flush(1));
  dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(3, /* Totally 3 files created up to now */
            TestGetTickerCount(options, BLOCK_CACHE_ADD));

  // After disabling options.paranoid_file_checks. NO further block
  // is added after generating a new file.
  ASSERT_OK(
      dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "false"}}));

  ASSERT_OK(Put(1, "1_key3", "val3"));
  ASSERT_OK(Put(1, "9_key3", "val3"));
  ASSERT_OK(Flush(1));
  ASSERT_OK(Put(1, "1_key4", "val4"));
  ASSERT_OK(Put(1, "9_key4", "val4"));
  ASSERT_OK(Flush(1));
  dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(3, /* Totally 3 files created up to now */
            TestGetTickerCount(options, BLOCK_CACHE_ADD));
}

I
Igor Sugak 已提交
1654
TEST_F(DBTest, GetPropertiesOfAllTablesTest) {
1655
  Options options = CurrentOptions();
I
Igor Canadi 已提交
1656
  options.max_background_flushes = 0;
L
Lei Jin 已提交
1657
  Reopen(options);
1658 1659 1660
  // Create 4 tables
  for (int table = 0; table < 4; ++table) {
    for (int i = 0; i < 10 + table; ++i) {
1661
      db_->Put(WriteOptions(), ToString(table * 100 + i), "val");
1662 1663 1664 1665 1666
    }
    db_->Flush(FlushOptions());
  }

  // 1. Read table properties directly from file
L
Lei Jin 已提交
1667
  Reopen(options);
1668 1669 1670
  VerifyTableProperties(db_, 10 + 11 + 12 + 13);

  // 2. Put two tables to table cache and
L
Lei Jin 已提交
1671
  Reopen(options);
1672 1673 1674
  // fetch key from 1st and 2nd table, which will internally place that table to
  // the table cache.
  for (int i = 0; i < 2; ++i) {
1675
    Get(ToString(i * 100 + 0));
1676 1677 1678 1679 1680
  }

  VerifyTableProperties(db_, 10 + 11 + 12 + 13);

  // 3. Put all tables to table cache
L
Lei Jin 已提交
1681
  Reopen(options);
1682 1683 1684
  // fetch key from 1st and 2nd table, which will internally place that table to
  // the table cache.
  for (int i = 0; i < 4; ++i) {
1685
    Get(ToString(i * 100 + 0));
1686 1687 1688 1689
  }
  VerifyTableProperties(db_, 10 + 11 + 12 + 13);
}

1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761
class CoutingUserTblPropCollector : public TablePropertiesCollector {
 public:
  const char* Name() const override { return "CoutingUserTblPropCollector"; }

  Status Finish(UserCollectedProperties* properties) override {
    std::string encoded;
    PutVarint32(&encoded, count_);
    *properties = UserCollectedProperties{
        {"CoutingUserTblPropCollector", message_}, {"Count", encoded},
    };
    return Status::OK();
  }

  Status AddUserKey(const Slice& user_key, const Slice& value, EntryType type,
                    SequenceNumber seq, uint64_t file_size) override {
    ++count_;
    return Status::OK();
  }

  virtual UserCollectedProperties GetReadableProperties() const override {
    return UserCollectedProperties{};
  }

 private:
  std::string message_ = "Rocksdb";
  uint32_t count_ = 0;
};

class CoutingUserTblPropCollectorFactory
    : public TablePropertiesCollectorFactory {
 public:
  virtual TablePropertiesCollector* CreateTablePropertiesCollector() override {
    return new CoutingUserTblPropCollector();
  }
  const char* Name() const override {
    return "CoutingUserTblPropCollectorFactory";
  }
};

TEST_F(DBTest, GetUserDefinedTablaProperties) {
  Options options = CurrentOptions();
  options.max_background_flushes = 0;
  options.table_properties_collector_factories.resize(1);
  options.table_properties_collector_factories[0] =
      std::make_shared<CoutingUserTblPropCollectorFactory>();
  Reopen(options);
  // Create 4 tables
  for (int table = 0; table < 4; ++table) {
    for (int i = 0; i < 10 + table; ++i) {
      db_->Put(WriteOptions(), ToString(table * 100 + i), "val");
    }
    db_->Flush(FlushOptions());
  }

  TablePropertiesCollection props;
  ASSERT_OK(db_->GetPropertiesOfAllTables(&props));
  ASSERT_EQ(4U, props.size());
  uint32_t sum = 0;
  for (const auto& item : props) {
    auto& user_collected = item.second->user_collected_properties;
    ASSERT_TRUE(user_collected.find("CoutingUserTblPropCollector") !=
                user_collected.end());
    ASSERT_EQ(user_collected.at("CoutingUserTblPropCollector"), "Rocksdb");
    ASSERT_TRUE(user_collected.find("Count") != user_collected.end());
    Slice key(user_collected.at("Count"));
    uint32_t count;
    ASSERT_TRUE(GetVarint32(&key, &count));
    sum += count;
  }
  ASSERT_EQ(10u + 11u + 12u + 13u, sum);
}

I
Igor Sugak 已提交
1762
TEST_F(DBTest, LevelLimitReopen) {
1763
  Options options = CurrentOptions();
L
Lei Jin 已提交
1764
  CreateAndReopenWithCF({"pikachu"}, options);
1765 1766 1767

  const std::string value(1024 * 1024, ' ');
  int i = 0;
1768 1769
  while (NumTableFilesAtLevel(2, 1) == 0) {
    ASSERT_OK(Put(1, Key(i++), value));
1770 1771 1772
  }

  options.num_levels = 1;
1773
  options.max_bytes_for_level_multiplier_additional.resize(1, 1);
L
Lei Jin 已提交
1774
  Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
1775
  ASSERT_EQ(s.IsInvalidArgument(), true);
1776
  ASSERT_EQ(s.ToString(),
1777
            "Invalid argument: db has more levels than options.num_levels");
1778 1779

  options.num_levels = 10;
1780
  options.max_bytes_for_level_multiplier_additional.resize(10, 1);
L
Lei Jin 已提交
1781
  ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
1782 1783
}

I
Igor Sugak 已提交
1784
TEST_F(DBTest, PutDeleteGet) {
S
Sanjay Ghemawat 已提交
1785
  do {
L
Lei Jin 已提交
1786
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
1787 1788 1789 1790 1791 1792
    ASSERT_OK(Put(1, "foo", "v1"));
    ASSERT_EQ("v1", Get(1, "foo"));
    ASSERT_OK(Put(1, "foo", "v2"));
    ASSERT_EQ("v2", Get(1, "foo"));
    ASSERT_OK(Delete(1, "foo"));
    ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
S
Sanjay Ghemawat 已提交
1793
  } while (ChangeOptions());
J
jorlow@chromium.org 已提交
1794 1795
}

I
Igor Sugak 已提交
1796
TEST_F(DBTest, GetFromImmutableLayer) {
S
Sanjay Ghemawat 已提交
1797
  do {
1798
    Options options;
S
Sanjay Ghemawat 已提交
1799 1800
    options.env = env_;
    options.write_buffer_size = 100000;  // Small write buffer
1801
    options = CurrentOptions(options);
L
Lei Jin 已提交
1802
    CreateAndReopenWithCF({"pikachu"}, options);
1803

1804 1805
    ASSERT_OK(Put(1, "foo", "v1"));
    ASSERT_EQ("v1", Get(1, "foo"));
1806

I
Igor Canadi 已提交
1807 1808
    // Block sync calls
    env_->delay_sstable_sync_.store(true, std::memory_order_release);
1809 1810 1811 1812
    Put(1, "k1", std::string(100000, 'x'));          // Fill memtable
    Put(1, "k2", std::string(100000, 'y'));          // Trigger flush
    ASSERT_EQ("v1", Get(1, "foo"));
    ASSERT_EQ("NOT_FOUND", Get(0, "foo"));
I
Igor Canadi 已提交
1813 1814
    // Release sync calls
    env_->delay_sstable_sync_.store(false, std::memory_order_release);
S
Sanjay Ghemawat 已提交
1815
  } while (ChangeOptions());
1816 1817
}

I
Igor Sugak 已提交
1818
TEST_F(DBTest, GetFromVersions) {
S
Sanjay Ghemawat 已提交
1819
  do {
L
Lei Jin 已提交
1820
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
1821 1822 1823 1824
    ASSERT_OK(Put(1, "foo", "v1"));
    ASSERT_OK(Flush(1));
    ASSERT_EQ("v1", Get(1, "foo"));
    ASSERT_EQ("NOT_FOUND", Get(0, "foo"));
S
Sanjay Ghemawat 已提交
1825
  } while (ChangeOptions());
1826 1827
}

I
Igor Sugak 已提交
1828
TEST_F(DBTest, GetSnapshot) {
1829 1830
  anon::OptionsOverride options_override;
  options_override.skip_policy = kSkipNoSnapshot;
S
Sanjay Ghemawat 已提交
1831
  do {
1832
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
S
Sanjay Ghemawat 已提交
1833 1834 1835
    // Try with both a short key and a long key
    for (int i = 0; i < 2; i++) {
      std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
1836
      ASSERT_OK(Put(1, key, "v1"));
S
Sanjay Ghemawat 已提交
1837
      const Snapshot* s1 = db_->GetSnapshot();
S
sdong 已提交
1838 1839 1840 1841 1842
      if (option_config_ == kHashCuckoo) {
        // NOt supported case.
        ASSERT_TRUE(s1 == nullptr);
        break;
      }
1843 1844 1845 1846 1847 1848
      ASSERT_OK(Put(1, key, "v2"));
      ASSERT_EQ("v2", Get(1, key));
      ASSERT_EQ("v1", Get(1, key, s1));
      ASSERT_OK(Flush(1));
      ASSERT_EQ("v2", Get(1, key));
      ASSERT_EQ("v1", Get(1, key, s1));
S
Sanjay Ghemawat 已提交
1849 1850
      db_->ReleaseSnapshot(s1);
    }
S
sdong 已提交
1851
  } while (ChangeOptions());
1852 1853
}

I
Igor Sugak 已提交
1854
TEST_F(DBTest, GetSnapshotLink) {
1855 1856 1857 1858 1859 1860
  do {
    Options options;
    const std::string snapshot_name = test::TmpDir(env_) + "/snapshot";
    DB* snapshotDB;
    ReadOptions roptions;
    std::string result;
1861
    Checkpoint* checkpoint;
1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876

    options = CurrentOptions(options);
    delete db_;
    db_ = nullptr;
    ASSERT_OK(DestroyDB(dbname_, options));
    ASSERT_OK(DestroyDB(snapshot_name, options));
    env_->DeleteDir(snapshot_name);

    // Create a database
    Status s;
    options.create_if_missing = true;
    ASSERT_OK(DB::Open(options, dbname_, &db_));
    std::string key = std::string("foo");
    ASSERT_OK(Put(key, "v1"));
    // Take a snapshot
1877 1878
    ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
    ASSERT_OK(checkpoint->CreateCheckpoint(snapshot_name));
1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903
    ASSERT_OK(Put(key, "v2"));
    ASSERT_EQ("v2", Get(key));
    ASSERT_OK(Flush());
    ASSERT_EQ("v2", Get(key));
    // Open snapshot and verify contents while DB is running
    options.create_if_missing = false;
    ASSERT_OK(DB::Open(options, snapshot_name, &snapshotDB));
    ASSERT_OK(snapshotDB->Get(roptions, key, &result));
    ASSERT_EQ("v1", result);
    delete snapshotDB;
    snapshotDB = nullptr;
    delete db_;
    db_ = nullptr;

    // Destroy original DB
    ASSERT_OK(DestroyDB(dbname_, options));

    // Open snapshot and verify contents
    options.create_if_missing = false;
    dbname_ = snapshot_name;
    ASSERT_OK(DB::Open(options, dbname_, &db_));
    ASSERT_EQ("v1", Get(key));
    delete db_;
    db_ = nullptr;
    ASSERT_OK(DestroyDB(dbname_, options));
1904
    delete checkpoint;
1905 1906 1907 1908 1909 1910

    // Restore DB name
    dbname_ = test::TmpDir(env_) + "/db_test";
  } while (ChangeOptions());
}

I
Igor Sugak 已提交
1911
TEST_F(DBTest, GetLevel0Ordering) {
S
Sanjay Ghemawat 已提交
1912
  do {
L
Lei Jin 已提交
1913
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
S
Sanjay Ghemawat 已提交
1914 1915 1916 1917
    // Check that we process level-0 files in correct order.  The code
    // below generates two level-0 files where the earlier one comes
    // before the later one in the level-0 file list since the earlier
    // one has a smaller "smallest" key.
1918 1919 1920 1921 1922 1923
    ASSERT_OK(Put(1, "bar", "b"));
    ASSERT_OK(Put(1, "foo", "v1"));
    ASSERT_OK(Flush(1));
    ASSERT_OK(Put(1, "foo", "v2"));
    ASSERT_OK(Flush(1));
    ASSERT_EQ("v2", Get(1, "foo"));
S
Sanjay Ghemawat 已提交
1924
  } while (ChangeOptions());
1925 1926
}

I
Igor Sugak 已提交
1927
TEST_F(DBTest, WrongLevel0Config) {
1928 1929 1930 1931 1932 1933 1934 1935 1936
  Options options = CurrentOptions();
  Close();
  ASSERT_OK(DestroyDB(dbname_, options));
  options.level0_stop_writes_trigger = 1;
  options.level0_slowdown_writes_trigger = 2;
  options.level0_file_num_compaction_trigger = 3;
  ASSERT_OK(DB::Open(options, dbname_, &db_));
}

I
Igor Sugak 已提交
1937
TEST_F(DBTest, GetOrderedByLevels) {
S
Sanjay Ghemawat 已提交
1938
  do {
L
Lei Jin 已提交
1939
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
1940 1941 1942 1943 1944 1945 1946
    ASSERT_OK(Put(1, "foo", "v1"));
    Compact(1, "a", "z");
    ASSERT_EQ("v1", Get(1, "foo"));
    ASSERT_OK(Put(1, "foo", "v2"));
    ASSERT_EQ("v2", Get(1, "foo"));
    ASSERT_OK(Flush(1));
    ASSERT_EQ("v2", Get(1, "foo"));
S
Sanjay Ghemawat 已提交
1947
  } while (ChangeOptions());
1948 1949
}

I
Igor Sugak 已提交
1950
TEST_F(DBTest, GetPicksCorrectFile) {
S
Sanjay Ghemawat 已提交
1951
  do {
L
Lei Jin 已提交
1952
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
S
Sanjay Ghemawat 已提交
1953
    // Arrange to have multiple files in a non-level-0 level.
1954 1955 1956 1957 1958 1959 1960 1961 1962
    ASSERT_OK(Put(1, "a", "va"));
    Compact(1, "a", "b");
    ASSERT_OK(Put(1, "x", "vx"));
    Compact(1, "x", "y");
    ASSERT_OK(Put(1, "f", "vf"));
    Compact(1, "f", "g");
    ASSERT_EQ("va", Get(1, "a"));
    ASSERT_EQ("vf", Get(1, "f"));
    ASSERT_EQ("vx", Get(1, "x"));
S
Sanjay Ghemawat 已提交
1963
  } while (ChangeOptions());
1964 1965
}

I
Igor Sugak 已提交
1966
TEST_F(DBTest, GetEncountersEmptyLevel) {
S
Sanjay Ghemawat 已提交
1967
  do {
I
Igor Canadi 已提交
1968 1969 1970
    Options options = CurrentOptions();
    options.max_background_flushes = 0;
    options.disableDataSync = true;
L
Lei Jin 已提交
1971
    CreateAndReopenWithCF({"pikachu"}, options);
S
Sanjay Ghemawat 已提交
1972 1973 1974 1975 1976 1977
    // Arrange for the following to happen:
    //   * sstable A in level 0
    //   * nothing in level 1
    //   * sstable B in level 2
    // Then do enough Get() calls to arrange for an automatic compaction
    // of sstable A.  A bug would cause the compaction to be marked as
C
clark.kang 已提交
1978
    // occurring at level 1 (instead of the correct level 0).
S
Sanjay Ghemawat 已提交
1979 1980 1981

    // Step 1: First place sstables in levels 0 and 2
    int compaction_count = 0;
1982
    while (NumTableFilesAtLevel(0, 1) == 0 || NumTableFilesAtLevel(2, 1) == 0) {
S
Sanjay Ghemawat 已提交
1983 1984
      ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2";
      compaction_count++;
1985 1986 1987
      Put(1, "a", "begin");
      Put(1, "z", "end");
      ASSERT_OK(Flush(1));
S
Sanjay Ghemawat 已提交
1988
    }
1989

S
Sanjay Ghemawat 已提交
1990
    // Step 2: clear level 1 if necessary.
1991 1992 1993 1994
    dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1);
    ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0);
    ASSERT_EQ(NumTableFilesAtLevel(2, 1), 1);
S
Sanjay Ghemawat 已提交
1995

H
heyongqiang 已提交
1996 1997
    // Step 3: read a bunch of times
    for (int i = 0; i < 1000; i++) {
1998
      ASSERT_EQ("NOT_FOUND", Get(1, "missing"));
S
Sanjay Ghemawat 已提交
1999
    }
H
heyongqiang 已提交
2000 2001 2002 2003

    // Step 4: Wait for compaction to finish
    env_->SleepForMicroseconds(1000000);

2004
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1);  // XXX
I
Igor Canadi 已提交
2005
  } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction));
2006 2007
}

2008 2009 2010
// KeyMayExist can lead to a few false positives, but not false negatives.
// To make test deterministic, use a much larger number of bits per key-20 than
// bits in the key, so that false positives are eliminated
I
Igor Sugak 已提交
2011
TEST_F(DBTest, KeyMayExist) {
2012
  do {
2013 2014
    ReadOptions ropts;
    std::string value;
2015 2016 2017
    anon::OptionsOverride options_override;
    options_override.filter_policy.reset(NewBloomFilterPolicy(20));
    Options options = CurrentOptions(options_override);
2018
    options.statistics = rocksdb::CreateDBStatistics();
L
Lei Jin 已提交
2019
    CreateAndReopenWithCF({"pikachu"}, options);
2020

2021
    ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
2022

2023
    ASSERT_OK(Put(1, "a", "b"));
2024
    bool value_found = false;
2025 2026
    ASSERT_TRUE(
        db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found));
2027 2028
    ASSERT_TRUE(value_found);
    ASSERT_EQ("b", value);
2029

2030
    ASSERT_OK(Flush(1));
2031
    value.clear();
2032

I
Igor Canadi 已提交
2033 2034
    long numopen = TestGetTickerCount(options, NO_FILE_OPENS);
    long cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
2035 2036
    ASSERT_TRUE(
        db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found));
M
Mayank Agarwal 已提交
2037
    ASSERT_TRUE(!value_found);
2038 2039
    // assert that no new files were opened and no new blocks were
    // read into block cache.
I
Igor Canadi 已提交
2040 2041
    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
2042

2043
    ASSERT_OK(Delete(1, "a"));
2044

I
Igor Canadi 已提交
2045 2046
    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
2047
    ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
I
Igor Canadi 已提交
2048 2049
    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
2050

2051
    ASSERT_OK(Flush(1));
2052 2053
    dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1],
                                true /* disallow trivial move */);
2054

I
Igor Canadi 已提交
2055 2056
    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
2057
    ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
I
Igor Canadi 已提交
2058 2059
    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
2060

2061
    ASSERT_OK(Delete(1, "c"));
2062

I
Igor Canadi 已提交
2063 2064
    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
2065
    ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "c", &value));
I
Igor Canadi 已提交
2066 2067
    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
2068

2069 2070
    // KeyMayExist function only checks data in block caches, which is not used
    // by plain table format.
I
Igor Canadi 已提交
2071 2072
  } while (
      ChangeOptions(kSkipPlainTable | kSkipHashIndex | kSkipFIFOCompaction));
2073 2074
}

I
Igor Sugak 已提交
2075
TEST_F(DBTest, NonBlockingIteration) {
2076 2077 2078
  do {
    ReadOptions non_blocking_opts, regular_opts;
    Options options = CurrentOptions();
2079
    options.statistics = rocksdb::CreateDBStatistics();
2080
    non_blocking_opts.read_tier = kBlockCacheTier;
L
Lei Jin 已提交
2081
    CreateAndReopenWithCF({"pikachu"}, options);
2082
    // write one kv to the database.
2083
    ASSERT_OK(Put(1, "a", "b"));
2084 2085 2086

    // scan using non-blocking iterator. We should find it because
    // it is in memtable.
2087
    Iterator* iter = db_->NewIterator(non_blocking_opts, handles_[1]);
2088 2089
    int count = 0;
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
N
Naman Gupta 已提交
2090
      ASSERT_OK(iter->status());
2091 2092 2093 2094 2095 2096 2097
      count++;
    }
    ASSERT_EQ(count, 1);
    delete iter;

    // flush memtable to storage. Now, the key should not be in the
    // memtable neither in the block cache.
2098
    ASSERT_OK(Flush(1));
2099 2100 2101

    // verify that a non-blocking iterator does not find any
    // kvs. Neither does it do any IOs to storage.
I
Igor Canadi 已提交
2102 2103
    long numopen = TestGetTickerCount(options, NO_FILE_OPENS);
    long cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
2104
    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
2105 2106 2107 2108 2109 2110
    count = 0;
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
      count++;
    }
    ASSERT_EQ(count, 0);
    ASSERT_TRUE(iter->status().IsIncomplete());
I
Igor Canadi 已提交
2111 2112
    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
2113 2114 2115
    delete iter;

    // read in the specified block via a regular get
2116
    ASSERT_EQ(Get(1, "a"), "b");
2117 2118

    // verify that we can find it via a non-blocking scan
I
Igor Canadi 已提交
2119 2120
    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
2121
    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
2122 2123
    count = 0;
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
N
Naman Gupta 已提交
2124
      ASSERT_OK(iter->status());
2125 2126 2127
      count++;
    }
    ASSERT_EQ(count, 1);
I
Igor Canadi 已提交
2128 2129
    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
2130 2131
    delete iter;

2132 2133
    // This test verifies block cache behaviors, which is not used by plain
    // table format.
2134
    // Exclude kHashCuckoo as it does not support iteration currently
I
Igor Canadi 已提交
2135 2136
  } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipHashCuckoo |
                         kSkipMmapReads));
2137 2138
}

I
Igor Sugak 已提交
2139
TEST_F(DBTest, ManagedNonBlockingIteration) {
V
Venkatesh Radhakrishnan 已提交
2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203
  do {
    ReadOptions non_blocking_opts, regular_opts;
    Options options = CurrentOptions();
    options.statistics = rocksdb::CreateDBStatistics();
    non_blocking_opts.read_tier = kBlockCacheTier;
    non_blocking_opts.managed = true;
    CreateAndReopenWithCF({"pikachu"}, options);
    // write one kv to the database.
    ASSERT_OK(Put(1, "a", "b"));

    // scan using non-blocking iterator. We should find it because
    // it is in memtable.
    Iterator* iter = db_->NewIterator(non_blocking_opts, handles_[1]);
    int count = 0;
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
      ASSERT_OK(iter->status());
      count++;
    }
    ASSERT_EQ(count, 1);
    delete iter;

    // flush memtable to storage. Now, the key should not be in the
    // memtable neither in the block cache.
    ASSERT_OK(Flush(1));

    // verify that a non-blocking iterator does not find any
    // kvs. Neither does it do any IOs to storage.
    int64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
    int64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
    count = 0;
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
      count++;
    }
    ASSERT_EQ(count, 0);
    ASSERT_TRUE(iter->status().IsIncomplete());
    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
    delete iter;

    // read in the specified block via a regular get
    ASSERT_EQ(Get(1, "a"), "b");

    // verify that we can find it via a non-blocking scan
    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
    count = 0;
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
      ASSERT_OK(iter->status());
      count++;
    }
    ASSERT_EQ(count, 1);
    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
    delete iter;

    // This test verifies block cache behaviors, which is not used by plain
    // table format.
    // Exclude kHashCuckoo as it does not support iteration currently
  } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipHashCuckoo |
                         kSkipMmapReads));
}

2204 2205
// A delete is skipped for key if KeyMayExist(key) returns False
// Tests Writebatch consistency and proper delete behaviour
I
Igor Sugak 已提交
2206
TEST_F(DBTest, FilterDeletes) {
2207
  do {
2208 2209 2210
    anon::OptionsOverride options_override;
    options_override.filter_policy.reset(NewBloomFilterPolicy(20));
    Options options = CurrentOptions(options_override);
2211
    options.filter_deletes = true;
L
Lei Jin 已提交
2212
    CreateAndReopenWithCF({"pikachu"}, options);
2213 2214
    WriteBatch batch;

I
Igor Canadi 已提交
2215
    batch.Delete(handles_[1], "a");
2216
    dbfull()->Write(WriteOptions(), &batch);
2217
    ASSERT_EQ(AllEntriesFor("a", 1), "[ ]");  // Delete skipped
2218 2219
    batch.Clear();

I
Igor Canadi 已提交
2220 2221
    batch.Put(handles_[1], "a", "b");
    batch.Delete(handles_[1], "a");
2222
    dbfull()->Write(WriteOptions(), &batch);
2223 2224
    ASSERT_EQ(Get(1, "a"), "NOT_FOUND");
    ASSERT_EQ(AllEntriesFor("a", 1), "[ DEL, b ]");  // Delete issued
2225 2226
    batch.Clear();

I
Igor Canadi 已提交
2227 2228
    batch.Delete(handles_[1], "c");
    batch.Put(handles_[1], "c", "d");
2229
    dbfull()->Write(WriteOptions(), &batch);
2230 2231
    ASSERT_EQ(Get(1, "c"), "d");
    ASSERT_EQ(AllEntriesFor("c", 1), "[ d ]");  // Delete skipped
2232 2233
    batch.Clear();

2234
    ASSERT_OK(Flush(1));  // A stray Flush
2235

I
Igor Canadi 已提交
2236
    batch.Delete(handles_[1], "c");
2237
    dbfull()->Write(WriteOptions(), &batch);
2238
    ASSERT_EQ(AllEntriesFor("c", 1), "[ DEL, d ]");  // Delete issued
2239 2240
    batch.Clear();
  } while (ChangeCompactOptions());
2241 2242
}

I
Igor Sugak 已提交
2243
TEST_F(DBTest, GetFilterByPrefixBloom) {
2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277
  Options options = last_options_;
  options.prefix_extractor.reset(NewFixedPrefixTransform(8));
  options.statistics = rocksdb::CreateDBStatistics();
  BlockBasedTableOptions bbto;
  bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
  bbto.whole_key_filtering = false;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  DestroyAndReopen(options);

  WriteOptions wo;
  ReadOptions ro;
  FlushOptions fo;
  fo.wait = true;
  std::string value;

  ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo"));
  ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2"));
  ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar"));

  dbfull()->Flush(fo);

  ASSERT_EQ("foo", Get("barbarbar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
  ASSERT_EQ("foo2", Get("barbarbar2"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
  ASSERT_EQ("NOT_FOUND", Get("barbarbar3"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);

  ASSERT_EQ("NOT_FOUND", Get("barfoofoo"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);

  ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
}
I
Igor Canadi 已提交
2278

I
Igor Sugak 已提交
2279
TEST_F(DBTest, WholeKeyFilterProp) {
2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427
  Options options = last_options_;
  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
  options.statistics = rocksdb::CreateDBStatistics();

  BlockBasedTableOptions bbto;
  bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
  bbto.whole_key_filtering = false;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  DestroyAndReopen(options);

  WriteOptions wo;
  ReadOptions ro;
  FlushOptions fo;
  fo.wait = true;
  std::string value;

  ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
  // Needs insert some keys to make sure files are not filtered out by key
  // ranges.
  ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
  ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
  dbfull()->Flush(fo);

  Reopen(options);
  ASSERT_EQ("NOT_FOUND", Get("foo"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
  ASSERT_EQ("NOT_FOUND", Get("bar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
  ASSERT_EQ("foo", Get("foobar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);

  // Reopen with whole key filtering enabled and prefix extractor
  // NULL. Bloom filter should be off for both of whole key and
  // prefix bloom.
  bbto.whole_key_filtering = true;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  options.prefix_extractor.reset();
  Reopen(options);

  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
  ASSERT_EQ("NOT_FOUND", Get("foo"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
  ASSERT_EQ("NOT_FOUND", Get("bar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
  ASSERT_EQ("foo", Get("foobar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
  // Write DB with only full key filtering.
  ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
  // Needs insert some keys to make sure files are not filtered out by key
  // ranges.
  ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
  ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
  db_->CompactRange(nullptr, nullptr);

  // Reopen with both of whole key off and prefix extractor enabled.
  // Still no bloom filter should be used.
  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
  bbto.whole_key_filtering = false;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  Reopen(options);

  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
  ASSERT_EQ("NOT_FOUND", Get("foo"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
  ASSERT_EQ("NOT_FOUND", Get("bar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
  ASSERT_EQ("foo", Get("foobar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);

  // Try to create a DB with mixed files:
  ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
  // Needs insert some keys to make sure files are not filtered out by key
  // ranges.
  ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
  ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
  db_->CompactRange(nullptr, nullptr);

  options.prefix_extractor.reset();
  bbto.whole_key_filtering = true;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  Reopen(options);

  // Try to create a DB with mixed files.
  ASSERT_OK(dbfull()->Put(wo, "barfoo", "bar"));
  // In this case needs insert some keys to make sure files are
  // not filtered out by key ranges.
  ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
  ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
  Flush();

  // Now we have two files:
  // File 1: An older file with prefix bloom.
  // File 2: A newer file with whole bloom filter.
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
  ASSERT_EQ("NOT_FOUND", Get("foo"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
  ASSERT_EQ("NOT_FOUND", Get("bar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
  ASSERT_EQ("foo", Get("foobar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
  ASSERT_EQ("bar", Get("barfoo"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);

  // Reopen with the same setting: only whole key is used
  Reopen(options);
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
  ASSERT_EQ("NOT_FOUND", Get("foo"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 5);
  ASSERT_EQ("NOT_FOUND", Get("bar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 6);
  ASSERT_EQ("foo", Get("foobar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
  ASSERT_EQ("bar", Get("barfoo"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);

  // Restart with both filters are allowed
  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
  bbto.whole_key_filtering = true;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  Reopen(options);
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
  // File 1 will has it filtered out.
  // File 2 will not, as prefix `foo` exists in the file.
  ASSERT_EQ("NOT_FOUND", Get("foo"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 8);
  ASSERT_EQ("NOT_FOUND", Get("bar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 10);
  ASSERT_EQ("foo", Get("foobar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
  ASSERT_EQ("bar", Get("barfoo"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);

  // Restart with only prefix bloom is allowed.
  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
  bbto.whole_key_filtering = false;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  Reopen(options);
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
  ASSERT_EQ("NOT_FOUND", Get("foo"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
  ASSERT_EQ("NOT_FOUND", Get("bar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
  ASSERT_EQ("foo", Get("foobar"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
  ASSERT_EQ("bar", Get("barfoo"));
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
}

I
Igor Sugak 已提交
2428
TEST_F(DBTest, IterSeekBeforePrev) {
I
Igor Canadi 已提交
2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443
  ASSERT_OK(Put("a", "b"));
  ASSERT_OK(Put("c", "d"));
  dbfull()->Flush(FlushOptions());
  ASSERT_OK(Put("0", "f"));
  ASSERT_OK(Put("1", "h"));
  dbfull()->Flush(FlushOptions());
  ASSERT_OK(Put("2", "j"));
  auto iter = db_->NewIterator(ReadOptions());
  iter->Seek(Slice("c"));
  iter->Prev();
  iter->Seek(Slice("a"));
  iter->Prev();
  delete iter;
}

I
Igor Canadi 已提交
2444
namespace {
2445 2446 2447
std::string MakeLongKey(size_t length, char c) {
  return std::string(length, c);
}
I
Igor Canadi 已提交
2448
}  // namespace
2449

I
Igor Sugak 已提交
2450
TEST_F(DBTest, IterLongKeys) {
2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482
  ASSERT_OK(Put(MakeLongKey(20, 0), "0"));
  ASSERT_OK(Put(MakeLongKey(32, 2), "2"));
  ASSERT_OK(Put("a", "b"));
  dbfull()->Flush(FlushOptions());
  ASSERT_OK(Put(MakeLongKey(50, 1), "1"));
  ASSERT_OK(Put(MakeLongKey(127, 3), "3"));
  ASSERT_OK(Put(MakeLongKey(64, 4), "4"));
  auto iter = db_->NewIterator(ReadOptions());

  // Create a key that needs to be skipped for Seq too new
  iter->Seek(MakeLongKey(20, 0));
  ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0");
  iter->Next();
  ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
  iter->Next();
  ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
  iter->Next();
  ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
  iter->Next();
  ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4");
  delete iter;

  iter = db_->NewIterator(ReadOptions());
  iter->Seek(MakeLongKey(50, 1));
  ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
  iter->Next();
  ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
  iter->Next();
  ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
  delete iter;
}

I
Igor Sugak 已提交
2483
TEST_F(DBTest, IterNextWithNewerSeq) {
S
sdong 已提交
2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503
  ASSERT_OK(Put("0", "0"));
  dbfull()->Flush(FlushOptions());
  ASSERT_OK(Put("a", "b"));
  ASSERT_OK(Put("c", "d"));
  ASSERT_OK(Put("d", "e"));
  auto iter = db_->NewIterator(ReadOptions());

  // Create a key that needs to be skipped for Seq too new
  for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
       i++) {
    ASSERT_OK(Put("b", "f"));
  }

  iter->Seek(Slice("a"));
  ASSERT_EQ(IterStatus(iter), "a->b");
  iter->Next();
  ASSERT_EQ(IterStatus(iter), "c->d");
  delete iter;
}

I
Igor Sugak 已提交
2504
TEST_F(DBTest, IterPrevWithNewerSeq) {
S
sdong 已提交
2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528
  ASSERT_OK(Put("0", "0"));
  dbfull()->Flush(FlushOptions());
  ASSERT_OK(Put("a", "b"));
  ASSERT_OK(Put("c", "d"));
  ASSERT_OK(Put("d", "e"));
  auto iter = db_->NewIterator(ReadOptions());

  // Create a key that needs to be skipped for Seq too new
  for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
       i++) {
    ASSERT_OK(Put("b", "f"));
  }

  iter->Seek(Slice("d"));
  ASSERT_EQ(IterStatus(iter), "d->e");
  iter->Prev();
  ASSERT_EQ(IterStatus(iter), "c->d");
  iter->Prev();
  ASSERT_EQ(IterStatus(iter), "a->b");

  iter->Prev();
  delete iter;
}

I
Igor Sugak 已提交
2529
TEST_F(DBTest, IterPrevWithNewerSeq2) {
S
sdong 已提交
2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551
  ASSERT_OK(Put("0", "0"));
  dbfull()->Flush(FlushOptions());
  ASSERT_OK(Put("a", "b"));
  ASSERT_OK(Put("c", "d"));
  ASSERT_OK(Put("d", "e"));
  auto iter = db_->NewIterator(ReadOptions());
  iter->Seek(Slice("c"));
  ASSERT_EQ(IterStatus(iter), "c->d");

  // Create a key that needs to be skipped for Seq too new
  for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
      i++) {
    ASSERT_OK(Put("b", "f"));
  }

  iter->Prev();
  ASSERT_EQ(IterStatus(iter), "a->b");

  iter->Prev();
  delete iter;
}

I
Igor Sugak 已提交
2552
TEST_F(DBTest, IterEmpty) {
2553
  do {
L
Lei Jin 已提交
2554
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
2555
    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
J
jorlow@chromium.org 已提交
2556

2557 2558
    iter->SeekToFirst();
    ASSERT_EQ(IterStatus(iter), "(invalid)");
J
jorlow@chromium.org 已提交
2559

2560 2561
    iter->SeekToLast();
    ASSERT_EQ(IterStatus(iter), "(invalid)");
J
jorlow@chromium.org 已提交
2562

2563 2564
    iter->Seek("foo");
    ASSERT_EQ(IterStatus(iter), "(invalid)");
J
jorlow@chromium.org 已提交
2565

2566 2567
    delete iter;
  } while (ChangeCompactOptions());
J
jorlow@chromium.org 已提交
2568 2569
}

I
Igor Sugak 已提交
2570
TEST_F(DBTest, IterSingle) {
2571
  do {
L
Lei Jin 已提交
2572
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
2573 2574
    ASSERT_OK(Put(1, "a", "va"));
    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
J
jorlow@chromium.org 已提交
2575

2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597
    iter->SeekToFirst();
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "(invalid)");
    iter->SeekToFirst();
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "(invalid)");

    iter->SeekToLast();
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "(invalid)");
    iter->SeekToLast();
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "(invalid)");

    iter->Seek("");
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "(invalid)");
J
jorlow@chromium.org 已提交
2598

2599 2600 2601 2602 2603 2604 2605 2606 2607 2608
    iter->Seek("a");
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "(invalid)");

    iter->Seek("b");
    ASSERT_EQ(IterStatus(iter), "(invalid)");

    delete iter;
  } while (ChangeCompactOptions());
J
jorlow@chromium.org 已提交
2609 2610
}

I
Igor Sugak 已提交
2611
TEST_F(DBTest, IterMulti) {
2612
  do {
L
Lei Jin 已提交
2613
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
2614 2615 2616 2617
    ASSERT_OK(Put(1, "a", "va"));
    ASSERT_OK(Put(1, "b", "vb"));
    ASSERT_OK(Put(1, "c", "vc"));
    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
J
jorlow@chromium.org 已提交
2618

2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630
    iter->SeekToFirst();
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "b->vb");
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "c->vc");
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "(invalid)");
    iter->SeekToFirst();
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "(invalid)");
J
jorlow@chromium.org 已提交
2631

2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650
    iter->SeekToLast();
    ASSERT_EQ(IterStatus(iter), "c->vc");
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "b->vb");
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "(invalid)");
    iter->SeekToLast();
    ASSERT_EQ(IterStatus(iter), "c->vc");
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "(invalid)");

    iter->Seek("");
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Seek("a");
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Seek("ax");
    ASSERT_EQ(IterStatus(iter), "b->vb");
2651

2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671
    iter->Seek("b");
    ASSERT_EQ(IterStatus(iter), "b->vb");
    iter->Seek("z");
    ASSERT_EQ(IterStatus(iter), "(invalid)");

    // Switch from reverse to forward
    iter->SeekToLast();
    iter->Prev();
    iter->Prev();
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "b->vb");

    // Switch from forward to reverse
    iter->SeekToFirst();
    iter->Next();
    iter->Next();
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "b->vb");

    // Make sure iter stays at snapshot
2672 2673 2674 2675 2676
    ASSERT_OK(Put(1, "a", "va2"));
    ASSERT_OK(Put(1, "a2", "va3"));
    ASSERT_OK(Put(1, "b", "vb2"));
    ASSERT_OK(Put(1, "c", "vc2"));
    ASSERT_OK(Delete(1, "b"));
2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695
    iter->SeekToFirst();
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "b->vb");
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "c->vc");
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "(invalid)");
    iter->SeekToLast();
    ASSERT_EQ(IterStatus(iter), "c->vc");
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "b->vb");
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "(invalid)");

    delete iter;
  } while (ChangeCompactOptions());
J
jorlow@chromium.org 已提交
2696 2697
}

2698 2699
// Check that we can skip over a run of user keys
// by using reseek rather than sequential scan
I
Igor Sugak 已提交
2700
TEST_F(DBTest, IterReseek) {
2701 2702 2703
  anon::OptionsOverride options_override;
  options_override.skip_policy = kSkipNoSnapshot;
  Options options = CurrentOptions(options_override);
2704 2705
  options.max_sequential_skip_in_iterations = 3;
  options.create_if_missing = true;
2706
  options.statistics = rocksdb::CreateDBStatistics();
L
Lei Jin 已提交
2707
  DestroyAndReopen(options);
L
Lei Jin 已提交
2708
  CreateAndReopenWithCF({"pikachu"}, options);
2709 2710 2711 2712

  // insert two keys with same userkey and verify that
  // reseek is not invoked. For each of these test cases,
  // verify that we can find the next key "b".
2713 2714 2715 2716
  ASSERT_OK(Put(1, "a", "one"));
  ASSERT_OK(Put(1, "a", "two"));
  ASSERT_OK(Put(1, "b", "bone"));
  Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
2717
  iter->SeekToFirst();
I
Igor Canadi 已提交
2718
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
2719 2720
  ASSERT_EQ(IterStatus(iter), "a->two");
  iter->Next();
I
Igor Canadi 已提交
2721
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
2722 2723 2724 2725 2726
  ASSERT_EQ(IterStatus(iter), "b->bone");
  delete iter;

  // insert a total of three keys with same userkey and verify
  // that reseek is still not invoked.
2727 2728
  ASSERT_OK(Put(1, "a", "three"));
  iter = db_->NewIterator(ReadOptions(), handles_[1]);
2729 2730 2731
  iter->SeekToFirst();
  ASSERT_EQ(IterStatus(iter), "a->three");
  iter->Next();
I
Igor Canadi 已提交
2732
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
2733 2734 2735 2736 2737
  ASSERT_EQ(IterStatus(iter), "b->bone");
  delete iter;

  // insert a total of four keys with same userkey and verify
  // that reseek is invoked.
2738 2739
  ASSERT_OK(Put(1, "a", "four"));
  iter = db_->NewIterator(ReadOptions(), handles_[1]);
2740 2741
  iter->SeekToFirst();
  ASSERT_EQ(IterStatus(iter), "a->four");
I
Igor Canadi 已提交
2742
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
2743
  iter->Next();
I
Igor Canadi 已提交
2744
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
2745 2746 2747 2748 2749 2750
  ASSERT_EQ(IterStatus(iter), "b->bone");
  delete iter;

  // Testing reverse iterator
  // At this point, we have three versions of "a" and one version of "b".
  // The reseek statistics is already at 1.
I
Igor Canadi 已提交
2751 2752
  int num_reseeks =
      (int)TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION);
2753 2754

  // Insert another version of b and assert that reseek is not invoked
2755 2756
  ASSERT_OK(Put(1, "b", "btwo"));
  iter = db_->NewIterator(ReadOptions(), handles_[1]);
2757 2758
  iter->SeekToLast();
  ASSERT_EQ(IterStatus(iter), "b->btwo");
I
Igor Canadi 已提交
2759 2760
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
            num_reseeks);
2761
  iter->Prev();
I
Igor Canadi 已提交
2762 2763
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
            num_reseeks + 1);
2764 2765 2766 2767 2768
  ASSERT_EQ(IterStatus(iter), "a->four");
  delete iter;

  // insert two more versions of b. This makes a total of 4 versions
  // of b and 4 versions of a.
2769 2770 2771
  ASSERT_OK(Put(1, "b", "bthree"));
  ASSERT_OK(Put(1, "b", "bfour"));
  iter = db_->NewIterator(ReadOptions(), handles_[1]);
2772 2773
  iter->SeekToLast();
  ASSERT_EQ(IterStatus(iter), "b->bfour");
I
Igor Canadi 已提交
2774 2775
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
            num_reseeks + 2);
2776 2777 2778
  iter->Prev();

  // the previous Prev call should have invoked reseek
I
Igor Canadi 已提交
2779 2780
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
            num_reseeks + 3);
2781 2782 2783 2784
  ASSERT_EQ(IterStatus(iter), "a->four");
  delete iter;
}

I
Igor Sugak 已提交
2785
TEST_F(DBTest, IterSmallAndLargeMix) {
2786
  do {
L
Lei Jin 已提交
2787
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
2788 2789 2790 2791 2792
    ASSERT_OK(Put(1, "a", "va"));
    ASSERT_OK(Put(1, "b", std::string(100000, 'b')));
    ASSERT_OK(Put(1, "c", "vc"));
    ASSERT_OK(Put(1, "d", std::string(100000, 'd')));
    ASSERT_OK(Put(1, "e", std::string(100000, 'e')));
J
jorlow@chromium.org 已提交
2793

2794
    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
J
jorlow@chromium.org 已提交
2795

2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807
    iter->SeekToFirst();
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "c->vc");
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
    iter->Next();
    ASSERT_EQ(IterStatus(iter), "(invalid)");
J
jorlow@chromium.org 已提交
2808

2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823
    iter->SeekToLast();
    ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "c->vc");
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "a->va");
    iter->Prev();
    ASSERT_EQ(IterStatus(iter), "(invalid)");

    delete iter;
  } while (ChangeCompactOptions());
J
jorlow@chromium.org 已提交
2824 2825
}

I
Igor Sugak 已提交
2826
TEST_F(DBTest, IterMultiWithDelete) {
S
Sanjay Ghemawat 已提交
2827
  do {
L
Lei Jin 已提交
2828
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
2829 2830 2831 2832 2833
    ASSERT_OK(Put(1, "ka", "va"));
    ASSERT_OK(Put(1, "kb", "vb"));
    ASSERT_OK(Put(1, "kc", "vc"));
    ASSERT_OK(Delete(1, "kb"));
    ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
2834 2835

    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
2836 2837
    iter->Seek("kc");
    ASSERT_EQ(IterStatus(iter), "kc->vc");
2838 2839
    if (!CurrentOptions().merge_operator) {
      // TODO: merge operator does not support backward iteration yet
2840 2841 2842 2843 2844 2845
      if (kPlainTableAllBytesPrefix != option_config_&&
          kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
          kHashLinkList != option_config_) {
        iter->Prev();
        ASSERT_EQ(IterStatus(iter), "ka->va");
      }
2846
    }
S
Sanjay Ghemawat 已提交
2847 2848
    delete iter;
  } while (ChangeOptions());
2849 2850
}

I
Igor Sugak 已提交
2851
TEST_F(DBTest, IterPrevMaxSkip) {
2852
  do {
L
Lei Jin 已提交
2853
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
2854
    for (int i = 0; i < 2; i++) {
2855 2856 2857 2858 2859
      ASSERT_OK(Put(1, "key1", "v1"));
      ASSERT_OK(Put(1, "key2", "v2"));
      ASSERT_OK(Put(1, "key3", "v3"));
      ASSERT_OK(Put(1, "key4", "v4"));
      ASSERT_OK(Put(1, "key5", "v5"));
2860 2861
    }

2862
    VerifyIterLast("key5->v5", 1);
2863

2864 2865
    ASSERT_OK(Delete(1, "key5"));
    VerifyIterLast("key4->v4", 1);
2866

2867 2868
    ASSERT_OK(Delete(1, "key4"));
    VerifyIterLast("key3->v3", 1);
2869

2870 2871
    ASSERT_OK(Delete(1, "key3"));
    VerifyIterLast("key2->v2", 1);
2872

2873 2874
    ASSERT_OK(Delete(1, "key2"));
    VerifyIterLast("key1->v1", 1);
2875

2876 2877
    ASSERT_OK(Delete(1, "key1"));
    VerifyIterLast("(invalid)", 1);
2878
  } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast));
2879 2880
}

I
Igor Sugak 已提交
2881
TEST_F(DBTest, IterWithSnapshot) {
2882 2883
  anon::OptionsOverride options_override;
  options_override.skip_policy = kSkipNoSnapshot;
2884
  do {
2885
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
2886 2887 2888 2889 2890
    ASSERT_OK(Put(1, "key1", "val1"));
    ASSERT_OK(Put(1, "key2", "val2"));
    ASSERT_OK(Put(1, "key3", "val3"));
    ASSERT_OK(Put(1, "key4", "val4"));
    ASSERT_OK(Put(1, "key5", "val5"));
2891 2892 2893 2894

    const Snapshot *snapshot = db_->GetSnapshot();
    ReadOptions options;
    options.snapshot = snapshot;
2895
    Iterator* iter = db_->NewIterator(options, handles_[1]);
2896 2897

    // Put more values after the snapshot
2898 2899
    ASSERT_OK(Put(1, "key100", "val100"));
    ASSERT_OK(Put(1, "key101", "val101"));
2900 2901 2902 2903 2904

    iter->Seek("key5");
    ASSERT_EQ(IterStatus(iter), "key5->val5");
    if (!CurrentOptions().merge_operator) {
      // TODO: merge operator does not support backward iteration yet
2905 2906 2907 2908 2909 2910 2911
      if (kPlainTableAllBytesPrefix != option_config_&&
        kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
        kHashLinkList != option_config_) {
        iter->Prev();
        ASSERT_EQ(IterStatus(iter), "key4->val4");
        iter->Prev();
        ASSERT_EQ(IterStatus(iter), "key3->val3");
2912

2913 2914 2915 2916 2917
        iter->Next();
        ASSERT_EQ(IterStatus(iter), "key4->val4");
        iter->Next();
        ASSERT_EQ(IterStatus(iter), "key5->val5");
      }
2918 2919 2920 2921 2922
      iter->Next();
      ASSERT_TRUE(!iter->Valid());
    }
    db_->ReleaseSnapshot(snapshot);
    delete iter;
2923 2924
    // skip as HashCuckooRep does not support snapshot
  } while (ChangeOptions(kSkipHashCuckoo));
2925 2926
}

I
Igor Sugak 已提交
2927
TEST_F(DBTest, Recover) {
S
Sanjay Ghemawat 已提交
2928
  do {
L
Lei Jin 已提交
2929
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
2930 2931 2932
    ASSERT_OK(Put(1, "foo", "v1"));
    ASSERT_OK(Put(1, "baz", "v5"));

L
Lei Jin 已提交
2933
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
2934 2935 2936 2937 2938 2939 2940
    ASSERT_EQ("v1", Get(1, "foo"));

    ASSERT_EQ("v1", Get(1, "foo"));
    ASSERT_EQ("v5", Get(1, "baz"));
    ASSERT_OK(Put(1, "bar", "v2"));
    ASSERT_OK(Put(1, "foo", "v3"));

L
Lei Jin 已提交
2941
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
2942 2943 2944 2945 2946
    ASSERT_EQ("v3", Get(1, "foo"));
    ASSERT_OK(Put(1, "foo", "v4"));
    ASSERT_EQ("v4", Get(1, "foo"));
    ASSERT_EQ("v2", Get(1, "bar"));
    ASSERT_EQ("v5", Get(1, "baz"));
S
Sanjay Ghemawat 已提交
2947
  } while (ChangeOptions());
J
jorlow@chromium.org 已提交
2948 2949
}

I
Igor Sugak 已提交
2950
TEST_F(DBTest, RecoverWithTableHandle) {
2951
  do {
2952
    Options options;
2953 2954 2955
    options.create_if_missing = true;
    options.write_buffer_size = 100;
    options.disable_auto_compactions = true;
2956
    options = CurrentOptions(options);
L
Lei Jin 已提交
2957
    DestroyAndReopen(options);
L
Lei Jin 已提交
2958
    CreateAndReopenWithCF({"pikachu"}, options);
2959

2960 2961 2962 2963 2964 2965 2966
    ASSERT_OK(Put(1, "foo", "v1"));
    ASSERT_OK(Put(1, "bar", "v2"));
    ASSERT_OK(Flush(1));
    ASSERT_OK(Put(1, "foo", "v3"));
    ASSERT_OK(Put(1, "bar", "v4"));
    ASSERT_OK(Flush(1));
    ASSERT_OK(Put(1, "big", std::string(100, 'a')));
L
Lei Jin 已提交
2967
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
2968 2969

    std::vector<std::vector<FileMetaData>> files;
2970
    dbfull()->TEST_GetFilesMetaData(handles_[1], &files);
2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987
    int total_files = 0;
    for (const auto& level : files) {
      total_files += level.size();
    }
    ASSERT_EQ(total_files, 3);
    for (const auto& level : files) {
      for (const auto& file : level) {
        if (kInfiniteMaxOpenFiles == option_config_) {
          ASSERT_TRUE(file.table_reader_handle != nullptr);
        } else {
          ASSERT_TRUE(file.table_reader_handle == nullptr);
        }
      }
    }
  } while (ChangeOptions());
}

I
Igor Sugak 已提交
2988
TEST_F(DBTest, IgnoreRecoveredLog) {
I
Igor Canadi 已提交
2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004
  std::string backup_logs = dbname_ + "/backup_logs";

  // delete old files in backup_logs directory
  env_->CreateDirIfMissing(backup_logs);
  std::vector<std::string> old_files;
  env_->GetChildren(backup_logs, &old_files);
  for (auto& file : old_files) {
    if (file != "." && file != "..") {
      env_->DeleteFile(backup_logs + "/" + file);
    }
  }

  do {
    Options options = CurrentOptions();
    options.create_if_missing = true;
    options.merge_operator = MergeOperators::CreateUInt64AddOperator();
3005 3006
    options.wal_dir = dbname_ + "/logs";
    DestroyAndReopen(options);
I
Igor Canadi 已提交
3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025

    // fill up the DB
    std::string one, two;
    PutFixed64(&one, 1);
    PutFixed64(&two, 2);
    ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one)));
    ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one)));
    ASSERT_OK(db_->Merge(WriteOptions(), Slice("bar"), Slice(one)));

    // copy the logs to backup
    std::vector<std::string> logs;
    env_->GetChildren(options.wal_dir, &logs);
    for (auto& log : logs) {
      if (log != ".." && log != ".") {
        CopyFile(options.wal_dir + "/" + log, backup_logs + "/" + log);
      }
    }

    // recover the DB
L
Lei Jin 已提交
3026
    Reopen(options);
I
Igor Canadi 已提交
3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039
    ASSERT_EQ(two, Get("foo"));
    ASSERT_EQ(one, Get("bar"));
    Close();

    // copy the logs from backup back to wal dir
    for (auto& log : logs) {
      if (log != ".." && log != ".") {
        CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log);
      }
    }
    // this should ignore the log files, recovery should not happen again
    // if the recovery happens, the same merge operator would be called twice,
    // leading to incorrect results
L
Lei Jin 已提交
3040
    Reopen(options);
I
Igor Canadi 已提交
3041 3042 3043
    ASSERT_EQ(two, Get("foo"));
    ASSERT_EQ(one, Get("bar"));
    Close();
L
Lei Jin 已提交
3044
    Destroy(options);
L
Lei Jin 已提交
3045
    Reopen(options);
3046
    Close();
I
Igor Canadi 已提交
3047 3048 3049 3050 3051 3052 3053 3054 3055 3056

    // copy the logs from backup back to wal dir
    env_->CreateDirIfMissing(options.wal_dir);
    for (auto& log : logs) {
      if (log != ".." && log != ".") {
        CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log);
      }
    }
    // assert that we successfully recovered only from logs, even though we
    // destroyed the DB
L
Lei Jin 已提交
3057
    Reopen(options);
I
Igor Canadi 已提交
3058 3059
    ASSERT_EQ(two, Get("foo"));
    ASSERT_EQ(one, Get("bar"));
3060 3061

    // Recovery will fail if DB directory doesn't exist.
L
Lei Jin 已提交
3062
    Destroy(options);
3063 3064 3065 3066 3067 3068 3069 3070 3071
    // copy the logs from backup back to wal dir
    env_->CreateDirIfMissing(options.wal_dir);
    for (auto& log : logs) {
      if (log != ".." && log != ".") {
        CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log);
        // we won't be needing this file no more
        env_->DeleteFile(backup_logs + "/" + log);
      }
    }
L
Lei Jin 已提交
3072
    Status s = TryReopen(options);
3073
    ASSERT_TRUE(!s.ok());
3074
  } while (ChangeOptions(kSkipHashCuckoo));
I
Igor Canadi 已提交
3075 3076
}

I
Igor Sugak 已提交
3077
TEST_F(DBTest, RollLog) {
H
heyongqiang 已提交
3078
  do {
L
Lei Jin 已提交
3079
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
3080 3081
    ASSERT_OK(Put(1, "foo", "v1"));
    ASSERT_OK(Put(1, "baz", "v5"));
H
heyongqiang 已提交
3082

L
Lei Jin 已提交
3083
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
H
heyongqiang 已提交
3084
    for (int i = 0; i < 10; i++) {
L
Lei Jin 已提交
3085
      ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
H
heyongqiang 已提交
3086
    }
3087
    ASSERT_OK(Put(1, "foo", "v4"));
H
heyongqiang 已提交
3088
    for (int i = 0; i < 10; i++) {
L
Lei Jin 已提交
3089
      ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
H
heyongqiang 已提交
3090 3091 3092 3093
    }
  } while (ChangeOptions());
}

I
Igor Sugak 已提交
3094
TEST_F(DBTest, WAL) {
3095
  do {
L
Lei Jin 已提交
3096
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
3097 3098
    WriteOptions writeOpt = WriteOptions();
    writeOpt.disableWAL = true;
3099 3100
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1"));
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
H
heyongqiang 已提交
3101

L
Lei Jin 已提交
3102
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
3103 3104
    ASSERT_EQ("v1", Get(1, "foo"));
    ASSERT_EQ("v1", Get(1, "bar"));
H
heyongqiang 已提交
3105

3106
    writeOpt.disableWAL = false;
3107
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v2"));
3108
    writeOpt.disableWAL = true;
3109
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v2"));
H
heyongqiang 已提交
3110

L
Lei Jin 已提交
3111
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
3112
    // Both value's should be present.
3113 3114
    ASSERT_EQ("v2", Get(1, "bar"));
    ASSERT_EQ("v2", Get(1, "foo"));
H
heyongqiang 已提交
3115

3116
    writeOpt.disableWAL = true;
3117
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v3"));
3118
    writeOpt.disableWAL = false;
3119
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v3"));
H
heyongqiang 已提交
3120

L
Lei Jin 已提交
3121
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
3122
    // again both values should be present.
3123 3124
    ASSERT_EQ("v3", Get(1, "foo"));
    ASSERT_EQ("v3", Get(1, "bar"));
3125
  } while (ChangeCompactOptions());
H
heyongqiang 已提交
3126 3127
}

I
Igor Sugak 已提交
3128
TEST_F(DBTest, CheckLock) {
3129 3130 3131
  do {
    DB* localdb;
    Options options = CurrentOptions();
L
Lei Jin 已提交
3132
    ASSERT_OK(TryReopen(options));
3133 3134

    // second open should fail
3135
    ASSERT_TRUE(!(DB::Open(options, dbname_, &localdb)).ok());
3136
  } while (ChangeCompactOptions());
3137 3138
}

I
Igor Sugak 已提交
3139
TEST_F(DBTest, FlushMultipleMemtable) {
3140 3141 3142 3143 3144 3145
  do {
    Options options = CurrentOptions();
    WriteOptions writeOpt = WriteOptions();
    writeOpt.disableWAL = true;
    options.max_write_buffer_number = 4;
    options.min_write_buffer_number_to_merge = 3;
3146
    options.max_write_buffer_number_to_maintain = -1;
L
Lei Jin 已提交
3147
    CreateAndReopenWithCF({"pikachu"}, options);
3148 3149 3150 3151 3152 3153 3154
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1"));
    ASSERT_OK(Flush(1));
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));

    ASSERT_EQ("v1", Get(1, "foo"));
    ASSERT_EQ("v1", Get(1, "bar"));
    ASSERT_OK(Flush(1));
3155 3156 3157
  } while (ChangeCompactOptions());
}

I
Igor Sugak 已提交
3158
TEST_F(DBTest, NumImmutableMemTable) {
3159 3160 3161 3162 3163 3164
  do {
    Options options = CurrentOptions();
    WriteOptions writeOpt = WriteOptions();
    writeOpt.disableWAL = true;
    options.max_write_buffer_number = 4;
    options.min_write_buffer_number_to_merge = 3;
3165
    options.max_write_buffer_number_to_maintain = 0;
3166
    options.write_buffer_size = 1000000;
L
Lei Jin 已提交
3167
    CreateAndReopenWithCF({"pikachu"}, options);
3168

3169
    std::string big_value(1000000 * 2, 'x');
3170
    std::string num;
3171
    SetPerfLevel(kEnableTime);;
3172
    ASSERT_TRUE(GetPerfLevel() == kEnableTime);
3173

3174 3175 3176
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k1", big_value));
    ASSERT_TRUE(dbfull()->GetProperty(handles_[1],
                                      "rocksdb.num-immutable-mem-table", &num));
3177
    ASSERT_EQ(num, "0");
3178 3179 3180
    ASSERT_TRUE(dbfull()->GetProperty(
        handles_[1], "rocksdb.num-entries-active-mem-table", &num));
    ASSERT_EQ(num, "1");
3181
    perf_context.Reset();
3182
    Get(1, "k1");
3183
    ASSERT_EQ(1, (int) perf_context.get_from_memtable_count);
3184

3185 3186 3187
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value));
    ASSERT_TRUE(dbfull()->GetProperty(handles_[1],
                                      "rocksdb.num-immutable-mem-table", &num));
3188
    ASSERT_EQ(num, "1");
3189 3190 3191 3192 3193 3194 3195
    ASSERT_TRUE(dbfull()->GetProperty(
        handles_[1], "rocksdb.num-entries-active-mem-table", &num));
    ASSERT_EQ(num, "1");
    ASSERT_TRUE(dbfull()->GetProperty(
        handles_[1], "rocksdb.num-entries-imm-mem-tables", &num));
    ASSERT_EQ(num, "1");

3196
    perf_context.Reset();
3197
    Get(1, "k1");
3198 3199
    ASSERT_EQ(2, (int) perf_context.get_from_memtable_count);
    perf_context.Reset();
3200
    Get(1, "k2");
3201
    ASSERT_EQ(1, (int) perf_context.get_from_memtable_count);
3202

3203
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k3", big_value));
3204 3205
    ASSERT_TRUE(dbfull()->GetProperty(
        handles_[1], "rocksdb.cur-size-active-mem-table", &num));
3206 3207
    ASSERT_TRUE(dbfull()->GetProperty(handles_[1],
                                      "rocksdb.num-immutable-mem-table", &num));
3208
    ASSERT_EQ(num, "2");
3209 3210 3211 3212 3213 3214
    ASSERT_TRUE(dbfull()->GetProperty(
        handles_[1], "rocksdb.num-entries-active-mem-table", &num));
    ASSERT_EQ(num, "1");
    ASSERT_TRUE(dbfull()->GetProperty(
        handles_[1], "rocksdb.num-entries-imm-mem-tables", &num));
    ASSERT_EQ(num, "2");
3215
    perf_context.Reset();
3216
    Get(1, "k2");
3217 3218
    ASSERT_EQ(2, (int) perf_context.get_from_memtable_count);
    perf_context.Reset();
3219
    Get(1, "k3");
3220 3221
    ASSERT_EQ(1, (int) perf_context.get_from_memtable_count);
    perf_context.Reset();
3222
    Get(1, "k1");
3223
    ASSERT_EQ(3, (int) perf_context.get_from_memtable_count);
3224

3225 3226 3227
    ASSERT_OK(Flush(1));
    ASSERT_TRUE(dbfull()->GetProperty(handles_[1],
                                      "rocksdb.num-immutable-mem-table", &num));
3228
    ASSERT_EQ(num, "0");
3229 3230
    ASSERT_TRUE(dbfull()->GetProperty(
        handles_[1], "rocksdb.cur-size-active-mem-table", &num));
S
sdong 已提交
3231
    // "200" is the size of the metadata of an empty skiplist, this would
3232
    // break if we change the default skiplist implementation
S
sdong 已提交
3233
    ASSERT_EQ(num, "200");
3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262

    uint64_t int_num;
    uint64_t base_total_size;
    ASSERT_TRUE(dbfull()->GetIntProperty(
        handles_[1], "rocksdb.estimate-num-keys", &base_total_size));

    ASSERT_OK(dbfull()->Delete(writeOpt, handles_[1], "k2"));
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k3", ""));
    ASSERT_OK(dbfull()->Delete(writeOpt, handles_[1], "k3"));
    ASSERT_TRUE(dbfull()->GetIntProperty(
        handles_[1], "rocksdb.num-deletes-active-mem-table", &int_num));
    ASSERT_EQ(int_num, 2U);
    ASSERT_TRUE(dbfull()->GetIntProperty(
        handles_[1], "rocksdb.num-entries-active-mem-table", &int_num));
    ASSERT_EQ(int_num, 3U);

    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value));
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value));
    ASSERT_TRUE(dbfull()->GetIntProperty(
        handles_[1], "rocksdb.num-entries-imm-mem-tables", &int_num));
    ASSERT_EQ(int_num, 4U);
    ASSERT_TRUE(dbfull()->GetIntProperty(
        handles_[1], "rocksdb.num-deletes-imm-mem-tables", &int_num));
    ASSERT_EQ(int_num, 2U);

    ASSERT_TRUE(dbfull()->GetIntProperty(
        handles_[1], "rocksdb.estimate-num-keys", &int_num));
    ASSERT_EQ(int_num, base_total_size + 1);

3263
    SetPerfLevel(kDisable);
3264
    ASSERT_TRUE(GetPerfLevel() == kDisable);
3265 3266 3267
  } while (ChangeCompactOptions());
}

3268 3269
class SleepingBackgroundTask {
 public:
I
Igor Canadi 已提交
3270 3271
  SleepingBackgroundTask()
      : bg_cv_(&mutex_), should_sleep_(true), done_with_sleep_(false) {}
3272 3273 3274 3275 3276
  void DoSleep() {
    MutexLock l(&mutex_);
    while (should_sleep_) {
      bg_cv_.Wait();
    }
I
Igor Canadi 已提交
3277 3278
    done_with_sleep_ = true;
    bg_cv_.SignalAll();
3279 3280 3281 3282 3283 3284
  }
  void WakeUp() {
    MutexLock l(&mutex_);
    should_sleep_ = false;
    bg_cv_.SignalAll();
  }
I
Igor Canadi 已提交
3285 3286 3287 3288 3289 3290
  void WaitUntilDone() {
    MutexLock l(&mutex_);
    while (!done_with_sleep_) {
      bg_cv_.Wait();
    }
  }
3291 3292 3293 3294 3295 3296 3297 3298 3299

  static void DoSleepTask(void* arg) {
    reinterpret_cast<SleepingBackgroundTask*>(arg)->DoSleep();
  }

 private:
  port::Mutex mutex_;
  port::CondVar bg_cv_;  // Signalled when background work finishes
  bool should_sleep_;
I
Igor Canadi 已提交
3300
  bool done_with_sleep_;
3301 3302
};

I
Igor Sugak 已提交
3303
TEST_F(DBTest, FlushEmptyColumnFamily) {
3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320
  // Block flush thread and disable compaction thread
  env_->SetBackgroundThreads(1, Env::HIGH);
  env_->SetBackgroundThreads(1, Env::LOW);
  SleepingBackgroundTask sleeping_task_low;
  env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
                 Env::Priority::LOW);
  SleepingBackgroundTask sleeping_task_high;
  env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_high,
                 Env::Priority::HIGH);

  Options options = CurrentOptions();
  // disable compaction
  options.disable_auto_compactions = true;
  WriteOptions writeOpt = WriteOptions();
  writeOpt.disableWAL = true;
  options.max_write_buffer_number = 2;
  options.min_write_buffer_number_to_merge = 1;
3321
  options.max_write_buffer_number_to_maintain = 1;
L
Lei Jin 已提交
3322
  CreateAndReopenWithCF({"pikachu"}, options);
3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346

  // Compaction can still go through even if no thread can flush the
  // mem table.
  ASSERT_OK(Flush(0));
  ASSERT_OK(Flush(1));

  // Insert can go through
  ASSERT_OK(dbfull()->Put(writeOpt, handles_[0], "foo", "v1"));
  ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));

  ASSERT_EQ("v1", Get(0, "foo"));
  ASSERT_EQ("v1", Get(1, "bar"));

  sleeping_task_high.WakeUp();
  sleeping_task_high.WaitUntilDone();

  // Flush can still go through.
  ASSERT_OK(Flush(0));
  ASSERT_OK(Flush(1));

  sleeping_task_low.WakeUp();
  sleeping_task_low.WaitUntilDone();
}

I
Igor Sugak 已提交
3347
TEST_F(DBTest, GetProperty) {
3348 3349 3350
  // Set sizes to both background thread pool to be 1 and block them.
  env_->SetBackgroundThreads(1, Env::HIGH);
  env_->SetBackgroundThreads(1, Env::LOW);
I
Igor Canadi 已提交
3351
  SleepingBackgroundTask sleeping_task_low;
3352 3353
  env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
                 Env::Priority::LOW);
I
Igor Canadi 已提交
3354
  SleepingBackgroundTask sleeping_task_high;
3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367
  env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_high,
                 Env::Priority::HIGH);

  Options options = CurrentOptions();
  WriteOptions writeOpt = WriteOptions();
  writeOpt.disableWAL = true;
  options.compaction_style = kCompactionStyleUniversal;
  options.level0_file_num_compaction_trigger = 1;
  options.compaction_options_universal.size_ratio = 50;
  options.max_background_compactions = 1;
  options.max_background_flushes = 1;
  options.max_write_buffer_number = 10;
  options.min_write_buffer_number_to_merge = 1;
3368
  options.max_write_buffer_number_to_maintain = 0;
3369
  options.write_buffer_size = 1000000;
L
Lei Jin 已提交
3370
  Reopen(options);
3371 3372 3373

  std::string big_value(1000000 * 2, 'x');
  std::string num;
3374
  uint64_t int_num;
3375 3376
  SetPerfLevel(kEnableTime);

3377 3378 3379 3380
  ASSERT_TRUE(
      dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num));
  ASSERT_EQ(int_num, 0U);

3381 3382 3383 3384 3385 3386 3387
  ASSERT_OK(dbfull()->Put(writeOpt, "k1", big_value));
  ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num));
  ASSERT_EQ(num, "0");
  ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num));
  ASSERT_EQ(num, "0");
  ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num));
  ASSERT_EQ(num, "0");
S
sdong 已提交
3388 3389
  ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num));
  ASSERT_EQ(num, "1");
3390 3391 3392 3393 3394
  perf_context.Reset();

  ASSERT_OK(dbfull()->Put(writeOpt, "k2", big_value));
  ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num));
  ASSERT_EQ(num, "1");
S
sdong 已提交
3395
  ASSERT_OK(dbfull()->Delete(writeOpt, "k-non-existing"));
3396 3397 3398 3399 3400 3401 3402
  ASSERT_OK(dbfull()->Put(writeOpt, "k3", big_value));
  ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num));
  ASSERT_EQ(num, "2");
  ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num));
  ASSERT_EQ(num, "1");
  ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num));
  ASSERT_EQ(num, "0");
S
sdong 已提交
3403
  ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num));
3404
  ASSERT_EQ(num, "2");
3405 3406 3407 3408 3409 3410 3411 3412
  // Verify the same set of properties through GetIntProperty
  ASSERT_TRUE(
      dbfull()->GetIntProperty("rocksdb.num-immutable-mem-table", &int_num));
  ASSERT_EQ(int_num, 2U);
  ASSERT_TRUE(
      dbfull()->GetIntProperty("rocksdb.mem-table-flush-pending", &int_num));
  ASSERT_EQ(int_num, 1U);
  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.compaction-pending", &int_num));
3413
  ASSERT_EQ(int_num, 0U);
3414
  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.estimate-num-keys", &int_num));
3415
  ASSERT_EQ(int_num, 2U);
3416

3417 3418 3419 3420
  ASSERT_TRUE(
      dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num));
  ASSERT_EQ(int_num, 0U);

3421
  sleeping_task_high.WakeUp();
I
Igor Canadi 已提交
3422
  sleeping_task_high.WaitUntilDone();
3423 3424 3425 3426 3427 3428 3429 3430 3431
  dbfull()->TEST_WaitForFlushMemTable();

  ASSERT_OK(dbfull()->Put(writeOpt, "k4", big_value));
  ASSERT_OK(dbfull()->Put(writeOpt, "k5", big_value));
  dbfull()->TEST_WaitForFlushMemTable();
  ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num));
  ASSERT_EQ(num, "0");
  ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num));
  ASSERT_EQ(num, "1");
S
sdong 已提交
3432 3433
  ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num));
  ASSERT_EQ(num, "4");
3434 3435 3436 3437 3438

  ASSERT_TRUE(
      dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num));
  ASSERT_GT(int_num, 0U);

3439
  sleeping_task_low.WakeUp();
I
Igor Canadi 已提交
3440
  sleeping_task_low.WaitUntilDone();
3441 3442 3443

  dbfull()->TEST_WaitForFlushMemTable();
  options.max_open_files = 10;
L
Lei Jin 已提交
3444
  Reopen(options);
3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456
  // After reopening, no table reader is loaded, so no memory for table readers
  ASSERT_TRUE(
      dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num));
  ASSERT_EQ(int_num, 0U);
  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.estimate-num-keys", &int_num));
  ASSERT_GT(int_num, 0U);

  // After reading a key, at least one table reader is loaded.
  Get("k5");
  ASSERT_TRUE(
      dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num));
  ASSERT_GT(int_num, 0U);
3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493

  // Test rocksdb.num-live-versions
  {
    options.level0_file_num_compaction_trigger = 20;
    Reopen(options);
    ASSERT_TRUE(
        dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num));
    ASSERT_EQ(int_num, 1U);

    // Use an iterator to hold current version
    std::unique_ptr<Iterator> iter1(dbfull()->NewIterator(ReadOptions()));

    ASSERT_OK(dbfull()->Put(writeOpt, "k6", big_value));
    Flush();
    ASSERT_TRUE(
        dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num));
    ASSERT_EQ(int_num, 2U);

    // Use an iterator to hold current version
    std::unique_ptr<Iterator> iter2(dbfull()->NewIterator(ReadOptions()));

    ASSERT_OK(dbfull()->Put(writeOpt, "k7", big_value));
    Flush();
    ASSERT_TRUE(
        dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num));
    ASSERT_EQ(int_num, 3U);

    iter2.reset();
    ASSERT_TRUE(
        dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num));
    ASSERT_EQ(int_num, 2U);

    iter1.reset();
    ASSERT_TRUE(
        dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num));
    ASSERT_EQ(int_num, 1U);
  }
3494 3495
}

I
Igor Sugak 已提交
3496
TEST_F(DBTest, FLUSH) {
3497
  do {
L
Lei Jin 已提交
3498
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
3499 3500
    WriteOptions writeOpt = WriteOptions();
    writeOpt.disableWAL = true;
3501
    SetPerfLevel(kEnableTime);;
3502
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1"));
3503
    // this will now also flush the last 2 writes
3504 3505
    ASSERT_OK(Flush(1));
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
H
heyongqiang 已提交
3506

3507
    perf_context.Reset();
3508
    Get(1, "foo");
3509 3510
    ASSERT_TRUE((int) perf_context.get_from_output_files_time > 0);

L
Lei Jin 已提交
3511
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
3512
    ASSERT_EQ("v1", Get(1, "foo"));
3513
    ASSERT_EQ("v1", Get(1, "bar"));
H
heyongqiang 已提交
3514

3515
    writeOpt.disableWAL = true;
3516 3517 3518
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v2"));
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v2"));
    ASSERT_OK(Flush(1));
H
heyongqiang 已提交
3519

L
Lei Jin 已提交
3520
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
3521
    ASSERT_EQ("v2", Get(1, "bar"));
3522
    perf_context.Reset();
3523
    ASSERT_EQ("v2", Get(1, "foo"));
3524
    ASSERT_TRUE((int) perf_context.get_from_output_files_time > 0);
H
heyongqiang 已提交
3525

3526
    writeOpt.disableWAL = false;
3527 3528 3529
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v3"));
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v3"));
    ASSERT_OK(Flush(1));
H
heyongqiang 已提交
3530

L
Lei Jin 已提交
3531
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
3532 3533
    // 'foo' should be there because its put
    // has WAL enabled.
3534 3535
    ASSERT_EQ("v3", Get(1, "foo"));
    ASSERT_EQ("v3", Get(1, "bar"));
3536 3537

    SetPerfLevel(kDisable);
3538
  } while (ChangeCompactOptions());
H
heyongqiang 已提交
3539 3540
}

I
Igor Sugak 已提交
3541
TEST_F(DBTest, RecoveryWithEmptyLog) {
S
Sanjay Ghemawat 已提交
3542
  do {
L
Lei Jin 已提交
3543
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
3544 3545
    ASSERT_OK(Put(1, "foo", "v1"));
    ASSERT_OK(Put(1, "foo", "v2"));
L
Lei Jin 已提交
3546 3547
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
3548
    ASSERT_OK(Put(1, "foo", "v3"));
L
Lei Jin 已提交
3549
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
3550
    ASSERT_EQ("v3", Get(1, "foo"));
S
Sanjay Ghemawat 已提交
3551
  } while (ChangeOptions());
J
jorlow@chromium.org 已提交
3552 3553
}

3554 3555
// Check that writes done during a memtable compaction are recovered
// if the database is shutdown during the memtable compaction.
I
Igor Sugak 已提交
3556
TEST_F(DBTest, RecoverDuringMemtableCompaction) {
S
Sanjay Ghemawat 已提交
3557
  do {
3558
    Options options;
S
Sanjay Ghemawat 已提交
3559 3560
    options.env = env_;
    options.write_buffer_size = 1000000;
3561
    options = CurrentOptions(options);
L
Lei Jin 已提交
3562
    CreateAndReopenWithCF({"pikachu"}, options);
3563

S
Sanjay Ghemawat 已提交
3564
    // Trigger a long memtable compaction and reopen the database during it
3565 3566 3567 3568 3569
    ASSERT_OK(Put(1, "foo", "v1"));  // Goes to 1st log file
    ASSERT_OK(Put(1, "big1", std::string(10000000, 'x')));  // Fills memtable
    ASSERT_OK(Put(1, "big2", std::string(1000, 'y')));  // Triggers compaction
    ASSERT_OK(Put(1, "bar", "v2"));                     // Goes to new log file

L
Lei Jin 已提交
3570
    ReopenWithColumnFamilies({"default", "pikachu"}, options);
3571 3572 3573 3574
    ASSERT_EQ("v1", Get(1, "foo"));
    ASSERT_EQ("v2", Get(1, "bar"));
    ASSERT_EQ(std::string(10000000, 'x'), Get(1, "big1"));
    ASSERT_EQ(std::string(1000, 'y'), Get(1, "big2"));
S
Sanjay Ghemawat 已提交
3575
  } while (ChangeOptions());
3576 3577
}

3578 3579 3580
// false positive TSAN report on shared_ptr --
// https://groups.google.com/forum/#!topic/thread-sanitizer/vz_s-t226Vg
#ifndef ROCKSDB_TSAN_RUN
I
Igor Sugak 已提交
3581
TEST_F(DBTest, FlushSchedule) {
I
Igor Canadi 已提交
3582 3583 3584 3585 3586
  Options options = CurrentOptions();
  options.disable_auto_compactions = true;
  options.level0_stop_writes_trigger = 1 << 10;
  options.level0_slowdown_writes_trigger = 1 << 10;
  options.min_write_buffer_number_to_merge = 1;
3587
  options.max_write_buffer_number_to_maintain = 1;
I
Igor Canadi 已提交
3588 3589
  options.max_write_buffer_number = 2;
  options.write_buffer_size = 100 * 1000;
L
Lei Jin 已提交
3590
  CreateAndReopenWithCF({"pikachu"}, options);
I
Igor Canadi 已提交
3591 3592
  std::vector<std::thread> threads;

I
Igor Canadi 已提交
3593
  std::atomic<int> thread_num(0);
I
Igor Canadi 已提交
3594 3595 3596 3597 3598 3599
  // each column family will have 5 thread, each thread generating 2 memtables.
  // each column family should end up with 10 table files
  for (int i = 0; i < 10; ++i) {
    threads.emplace_back([&]() {
      int a = thread_num.fetch_add(1);
      Random rnd(a);
I
Igor Canadi 已提交
3600
      WriteOptions wo;
I
Igor Canadi 已提交
3601 3602
      // this should fill up 2 memtables
      for (int k = 0; k < 5000; ++k) {
I
Igor Canadi 已提交
3603
        ASSERT_OK(db_->Put(wo, handles_[a & 1], RandomString(&rnd, 13), ""));
I
Igor Canadi 已提交
3604 3605 3606 3607 3608 3609 3610 3611
      }
    });
  }

  for (auto& t : threads) {
    t.join();
  }

I
Igor Canadi 已提交
3612 3613 3614 3615 3616 3617
  auto default_tables = GetNumberOfSstFilesForColumnFamily(db_, "default");
  auto pikachu_tables = GetNumberOfSstFilesForColumnFamily(db_, "pikachu");
  ASSERT_LE(default_tables, static_cast<uint64_t>(10));
  ASSERT_GT(default_tables, static_cast<uint64_t>(0));
  ASSERT_LE(pikachu_tables, static_cast<uint64_t>(10));
  ASSERT_GT(pikachu_tables, static_cast<uint64_t>(0));
I
Igor Canadi 已提交
3618
}
3619
#endif  // enabled only if not TSAN run
I
Igor Canadi 已提交
3620

I
Igor Sugak 已提交
3621
TEST_F(DBTest, MinorCompactionsHappen) {
3622
  do {
3623
    Options options;
3624
    options.write_buffer_size = 10000;
3625
    options = CurrentOptions(options);
L
Lei Jin 已提交
3626
    CreateAndReopenWithCF({"pikachu"}, options);
J
jorlow@chromium.org 已提交
3627

3628
    const int N = 500;
J
jorlow@chromium.org 已提交
3629

3630
    int starting_num_tables = TotalTableFiles(1);
3631
    for (int i = 0; i < N; i++) {
3632
      ASSERT_OK(Put(1, Key(i), Key(i) + std::string(1000, 'v')));
3633
    }
3634
    int ending_num_tables = TotalTableFiles(1);
3635
    ASSERT_GT(ending_num_tables, starting_num_tables);
J
jorlow@chromium.org 已提交
3636

3637
    for (int i = 0; i < N; i++) {
3638
      ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(1, Key(i)));
3639
    }
J
jorlow@chromium.org 已提交
3640

L
Lei Jin 已提交
3641
    ReopenWithColumnFamilies({"default", "pikachu"}, options);
J
jorlow@chromium.org 已提交
3642

3643
    for (int i = 0; i < N; i++) {
3644
      ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(1, Key(i)));
3645 3646
    }
  } while (ChangeCompactOptions());
J
jorlow@chromium.org 已提交
3647 3648
}

I
Igor Sugak 已提交
3649
TEST_F(DBTest, ManifestRollOver) {
3650
  do {
3651
    Options options;
3652
    options.max_manifest_file_size = 10 ;  // 10 bytes
3653
    options = CurrentOptions(options);
L
Lei Jin 已提交
3654
    CreateAndReopenWithCF({"pikachu"}, options);
3655
    {
3656 3657 3658 3659 3660 3661
      ASSERT_OK(Put(1, "manifest_key1", std::string(1000, '1')));
      ASSERT_OK(Put(1, "manifest_key2", std::string(1000, '2')));
      ASSERT_OK(Put(1, "manifest_key3", std::string(1000, '3')));
      uint64_t manifest_before_flush = dbfull()->TEST_Current_Manifest_FileNo();
      ASSERT_OK(Flush(1));  // This should trigger LogAndApply.
      uint64_t manifest_after_flush = dbfull()->TEST_Current_Manifest_FileNo();
M
Mayank Agarwal 已提交
3662
      ASSERT_GT(manifest_after_flush, manifest_before_flush);
L
Lei Jin 已提交
3663
      ReopenWithColumnFamilies({"default", "pikachu"}, options);
3664
      ASSERT_GT(dbfull()->TEST_Current_Manifest_FileNo(), manifest_after_flush);
3665
      // check if a new manifest file got inserted or not.
3666 3667 3668
      ASSERT_EQ(std::string(1000, '1'), Get(1, "manifest_key1"));
      ASSERT_EQ(std::string(1000, '2'), Get(1, "manifest_key2"));
      ASSERT_EQ(std::string(1000, '3'), Get(1, "manifest_key3"));
3669 3670
    }
  } while (ChangeCompactOptions());
A
Abhishek Kona 已提交
3671 3672
}

I
Igor Sugak 已提交
3673
TEST_F(DBTest, IdentityAcrossRestarts) {
M
Mayank Agarwal 已提交
3674
  do {
3675 3676
    std::string id1;
    ASSERT_OK(db_->GetDbIdentity(id1));
M
Mayank Agarwal 已提交
3677 3678

    Options options = CurrentOptions();
L
Lei Jin 已提交
3679
    Reopen(options);
3680 3681
    std::string id2;
    ASSERT_OK(db_->GetDbIdentity(id2));
M
Mayank Agarwal 已提交
3682
    // id1 should match id2 because identity was not regenerated
3683
    ASSERT_EQ(id1.compare(id2), 0);
M
Mayank Agarwal 已提交
3684

3685
    std::string idfilename = IdentityFileName(dbname_);
M
Mayank Agarwal 已提交
3686
    ASSERT_OK(env_->DeleteFile(idfilename));
L
Lei Jin 已提交
3687
    Reopen(options);
3688 3689 3690 3691
    std::string id3;
    ASSERT_OK(db_->GetDbIdentity(id3));
    // id1 should NOT match id3 because identity was regenerated
    ASSERT_NE(id1.compare(id3), 0);
M
Mayank Agarwal 已提交
3692 3693 3694
  } while (ChangeCompactOptions());
}

I
Igor Sugak 已提交
3695
TEST_F(DBTest, RecoverWithLargeLog) {
3696 3697 3698
  do {
    {
      Options options = CurrentOptions();
L
Lei Jin 已提交
3699
      CreateAndReopenWithCF({"pikachu"}, options);
3700 3701 3702 3703 3704
      ASSERT_OK(Put(1, "big1", std::string(200000, '1')));
      ASSERT_OK(Put(1, "big2", std::string(200000, '2')));
      ASSERT_OK(Put(1, "small3", std::string(10, '3')));
      ASSERT_OK(Put(1, "small4", std::string(10, '4')));
      ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
3705 3706 3707 3708
    }

    // Make sure that if we re-open with a small write buffer size that
    // we flush table files in the middle of a large log file.
3709
    Options options;
3710
    options.write_buffer_size = 100000;
3711
    options = CurrentOptions(options);
L
Lei Jin 已提交
3712
    ReopenWithColumnFamilies({"default", "pikachu"}, options);
3713 3714 3715 3716 3717 3718
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 3);
    ASSERT_EQ(std::string(200000, '1'), Get(1, "big1"));
    ASSERT_EQ(std::string(200000, '2'), Get(1, "big2"));
    ASSERT_EQ(std::string(10, '3'), Get(1, "small3"));
    ASSERT_EQ(std::string(10, '4'), Get(1, "small4"));
    ASSERT_GT(NumTableFilesAtLevel(0, 1), 1);
3719
  } while (ChangeCompactOptions());
J
jorlow@chromium.org 已提交
3720 3721
}

I
Igor Sugak 已提交
3722
TEST_F(DBTest, CompactionsGenerateMultipleFiles) {
3723
  Options options;
J
jorlow@chromium.org 已提交
3724
  options.write_buffer_size = 100000000;        // Large write buffer
3725
  options = CurrentOptions(options);
L
Lei Jin 已提交
3726
  CreateAndReopenWithCF({"pikachu"}, options);
J
jorlow@chromium.org 已提交
3727 3728 3729 3730

  Random rnd(301);

  // Write 8MB (80 values, each 100K)
3731
  ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
J
jorlow@chromium.org 已提交
3732 3733 3734
  std::vector<std::string> values;
  for (int i = 0; i < 80; i++) {
    values.push_back(RandomString(&rnd, 100000));
3735
    ASSERT_OK(Put(1, Key(i), values[i]));
J
jorlow@chromium.org 已提交
3736 3737 3738
  }

  // Reopening moves updates to level-0
L
Lei Jin 已提交
3739
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
3740 3741
  dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1],
                              true /* disallow trivial move */);
J
jorlow@chromium.org 已提交
3742

3743 3744
  ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
  ASSERT_GT(NumTableFilesAtLevel(1, 1), 1);
J
jorlow@chromium.org 已提交
3745
  for (int i = 0; i < 80; i++) {
3746
    ASSERT_EQ(Get(1, Key(i)), values[i]);
J
jorlow@chromium.org 已提交
3747 3748 3749
  }
}

3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897
TEST_F(DBTest, TrivialMoveOneFile) {
  int32_t trivial_move = 0;
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:TrivialMove",
      [&](void* arg) { trivial_move++; });
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();

  Options options;
  options.write_buffer_size = 100000000;
  options = CurrentOptions(options);
  DestroyAndReopen(options);

  int32_t num_keys = 80;
  int32_t value_size = 100 * 1024;  // 100 KB

  Random rnd(301);
  std::vector<std::string> values;
  for (int i = 0; i < num_keys; i++) {
    values.push_back(RandomString(&rnd, value_size));
    ASSERT_OK(Put(Key(i), values[i]));
  }

  // Reopening moves updates to L0
  Reopen(options);
  ASSERT_EQ(NumTableFilesAtLevel(0, 0), 1);  // 1 file in L0
  ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0);  // 0 files in L1

  std::vector<LiveFileMetaData> metadata;
  db_->GetLiveFilesMetaData(&metadata);
  ASSERT_EQ(metadata.size(), 1U);
  LiveFileMetaData level0_file = metadata[0];  // L0 file meta

  // Compaction will initiate a trivial move from L0 to L1
  dbfull()->CompactRange(nullptr, nullptr);

  // File moved From L0 to L1
  ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);  // 0 files in L0
  ASSERT_EQ(NumTableFilesAtLevel(1, 0), 1);  // 1 file in L1

  metadata.clear();
  db_->GetLiveFilesMetaData(&metadata);
  ASSERT_EQ(metadata.size(), 1U);
  ASSERT_EQ(metadata[0].name /* level1_file.name */, level0_file.name);
  ASSERT_EQ(metadata[0].size /* level1_file.size */, level0_file.size);

  for (int i = 0; i < num_keys; i++) {
    ASSERT_EQ(Get(Key(i)), values[i]);
  }

  ASSERT_EQ(trivial_move, 1);
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_F(DBTest, TrivialMoveNonOverlappingFiles) {
  int32_t trivial_move = 0;
  int32_t non_trivial_move = 0;
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:TrivialMove",
      [&](void* arg) { trivial_move++; });
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:NonTrivial",
      [&](void* arg) { non_trivial_move++; });
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();

  Options options = CurrentOptions();
  options.disable_auto_compactions = true;
  options.write_buffer_size = 10 * 1024 * 1024;

  DestroyAndReopen(options);
  // non overlapping ranges
  std::vector<std::pair<int32_t, int32_t>> ranges = {
    {100, 199},
    {300, 399},
    {0, 99},
    {200, 299},
    {600, 699},
    {400, 499},
    {500, 550},
    {551, 599},
  };
  int32_t value_size = 10 * 1024;  // 10 KB

  Random rnd(301);
  std::map<int32_t, std::string> values;
  for (uint32_t i = 0; i < ranges.size(); i++) {
    for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) {
      values[j] = RandomString(&rnd, value_size);
      ASSERT_OK(Put(Key(j), values[j]));
    }
    ASSERT_OK(Flush());
  }

  int32_t level0_files = NumTableFilesAtLevel(0, 0);
  ASSERT_EQ(level0_files, ranges.size());    // Multiple files in L0
  ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0);  // No files in L1

  // Since data is non-overlapping we expect compaction to initiate
  // a trivial move
  db_->CompactRange(nullptr, nullptr);
  // We expect that all the files were trivially moved from L0 to L1
  ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
  ASSERT_EQ(NumTableFilesAtLevel(1, 0) /* level1_files */, level0_files);

  for (uint32_t i = 0; i < ranges.size(); i++) {
    for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) {
      ASSERT_EQ(Get(Key(j)), values[j]);
    }
  }

  ASSERT_EQ(trivial_move, 1);
  ASSERT_EQ(non_trivial_move, 0);

  trivial_move = 0;
  non_trivial_move = 0;
  values.clear();
  DestroyAndReopen(options);
  // Same ranges as above but overlapping
  ranges = {
    {100, 199},
    {300, 399},
    {0, 99},
    {200, 299},
    {600, 699},
    {400, 499},
    {500, 560},  // this range overlap with the next one
    {551, 599},
  };
  for (uint32_t i = 0; i < ranges.size(); i++) {
    for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) {
      values[j] = RandomString(&rnd, value_size);
      ASSERT_OK(Put(Key(j), values[j]));
    }
    ASSERT_OK(Flush());
  }

  db_->CompactRange(nullptr, nullptr);

  for (uint32_t i = 0; i < ranges.size(); i++) {
    for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) {
      ASSERT_EQ(Get(Key(j)), values[j]);
    }
  }
  ASSERT_EQ(trivial_move, 0);
  ASSERT_EQ(non_trivial_move, 1);

  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}

I
Igor Sugak 已提交
3898
TEST_F(DBTest, CompactionTrigger) {
3899
  Options options;
3900 3901 3902 3903
  options.write_buffer_size = 100<<10; //100KB
  options.num_levels = 3;
  options.max_mem_compaction_level = 0;
  options.level0_file_num_compaction_trigger = 3;
3904
  options = CurrentOptions(options);
L
Lei Jin 已提交
3905
  CreateAndReopenWithCF({"pikachu"}, options);
3906 3907 3908

  Random rnd(301);

3909
  for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
3910
       num++) {
3911 3912 3913 3914
    std::vector<std::string> values;
    // Write 120KB (12 values, each 10K)
    for (int i = 0; i < 12; i++) {
      values.push_back(RandomString(&rnd, 10000));
3915
      ASSERT_OK(Put(1, Key(i), values[i]));
3916
    }
3917 3918
    dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), num + 1);
3919 3920 3921 3922 3923 3924
  }

  //generate one more file in level-0, and should trigger level-0 compaction
  std::vector<std::string> values;
  for (int i = 0; i < 12; i++) {
    values.push_back(RandomString(&rnd, 10000));
3925
    ASSERT_OK(Put(1, Key(i), values[i]));
3926 3927 3928
  }
  dbfull()->TEST_WaitForCompact();

3929 3930
  ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
  ASSERT_EQ(NumTableFilesAtLevel(1, 1), 1);
3931 3932
}

3933 3934 3935 3936 3937 3938 3939 3940 3941
namespace {
static const int kCDTValueSize = 1000;
static const int kCDTKeysPerBuffer = 4;
static const int kCDTNumLevels = 8;
Options DeletionTriggerOptions() {
  Options options;
  options.compression = kNoCompression;
  options.write_buffer_size = kCDTKeysPerBuffer * (kCDTValueSize + 24);
  options.min_write_buffer_number_to_merge = 1;
3942
  options.max_write_buffer_number_to_maintain = 0;
3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955
  options.num_levels = kCDTNumLevels;
  options.max_mem_compaction_level = 0;
  options.level0_file_num_compaction_trigger = 1;
  options.target_file_size_base = options.write_buffer_size * 2;
  options.target_file_size_multiplier = 2;
  options.max_bytes_for_level_base =
      options.target_file_size_base * options.target_file_size_multiplier;
  options.max_bytes_for_level_multiplier = 2;
  options.disable_auto_compactions = false;
  return options;
}
}  // anonymous namespace

I
Igor Sugak 已提交
3956
TEST_F(DBTest, CompactionDeletionTrigger) {
3957 3958
  for (int tid = 0; tid < 2; ++tid) {
    uint64_t db_size[2];
3959 3960 3961 3962 3963 3964 3965
    Options options = CurrentOptions(DeletionTriggerOptions());

    if (tid == 1) {
      // second pass with universal compaction
      options.compaction_style = kCompactionStyleUniversal;
      options.num_levels = 1;
    }
3966

L
Lei Jin 已提交
3967
    DestroyAndReopen(options);
3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991
    Random rnd(301);

    const int kTestSize = kCDTKeysPerBuffer * 512;
    std::vector<std::string> values;
    for (int k = 0; k < kTestSize; ++k) {
      values.push_back(RandomString(&rnd, kCDTValueSize));
      ASSERT_OK(Put(Key(k), values[k]));
    }
    dbfull()->TEST_WaitForFlushMemTable();
    dbfull()->TEST_WaitForCompact();
    db_size[0] = Size(Key(0), Key(kTestSize - 1));

    for (int k = 0; k < kTestSize; ++k) {
      ASSERT_OK(Delete(Key(k)));
    }
    dbfull()->TEST_WaitForFlushMemTable();
    dbfull()->TEST_WaitForCompact();
    db_size[1] = Size(Key(0), Key(kTestSize - 1));

    // must have much smaller db size.
    ASSERT_GT(db_size[0] / 3, db_size[1]);
  }
}

I
Igor Sugak 已提交
3992
TEST_F(DBTest, CompactionDeletionTriggerReopen) {
3993 3994
  for (int tid = 0; tid < 2; ++tid) {
    uint64_t db_size[3];
3995
    Options options = CurrentOptions(DeletionTriggerOptions());
3996

3997 3998 3999 4000 4001 4002
    if (tid == 1) {
      // second pass with universal compaction
      options.compaction_style = kCompactionStyleUniversal;
      options.num_levels = 1;
    }

L
Lei Jin 已提交
4003
    DestroyAndReopen(options);
4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020
    Random rnd(301);

    // round 1 --- insert key/value pairs.
    const int kTestSize = kCDTKeysPerBuffer * 512;
    std::vector<std::string> values;
    for (int k = 0; k < kTestSize; ++k) {
      values.push_back(RandomString(&rnd, kCDTValueSize));
      ASSERT_OK(Put(Key(k), values[k]));
    }
    dbfull()->TEST_WaitForFlushMemTable();
    dbfull()->TEST_WaitForCompact();
    db_size[0] = Size(Key(0), Key(kTestSize - 1));
    Close();

    // round 2 --- disable auto-compactions and issue deletions.
    options.create_if_missing = false;
    options.disable_auto_compactions = true;
L
Lei Jin 已提交
4021
    Reopen(options);
4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034

    for (int k = 0; k < kTestSize; ++k) {
      ASSERT_OK(Delete(Key(k)));
    }
    db_size[1] = Size(Key(0), Key(kTestSize - 1));
    Close();
    // as auto_compaction is off, we shouldn't see too much reduce
    // in db size.
    ASSERT_LT(db_size[0] / 3, db_size[1]);

    // round 3 --- reopen db with auto_compaction on and see if
    // deletion compensation still work.
    options.disable_auto_compactions = false;
L
Lei Jin 已提交
4035
    Reopen(options);
4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047
    // insert relatively small amount of data to trigger auto compaction.
    for (int k = 0; k < kTestSize / 10; ++k) {
      ASSERT_OK(Put(Key(k), values[k]));
    }
    dbfull()->TEST_WaitForFlushMemTable();
    dbfull()->TEST_WaitForCompact();
    db_size[2] = Size(Key(0), Key(kTestSize - 1));
    // this time we're expecting significant drop in size.
    ASSERT_GT(db_size[0] / 3, db_size[2]);
  }
}

4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076
// This is a static filter used for filtering
// kvs during the compaction process.
static int cfilter_count;
static std::string NEW_VALUE = "NewValue";

class KeepFilter : public CompactionFilter {
 public:
  virtual bool Filter(int level, const Slice& key, const Slice& value,
                      std::string* new_value, bool* value_changed) const
      override {
    cfilter_count++;
    return false;
  }

  virtual const char* Name() const override { return "KeepFilter"; }
};

class DeleteFilter : public CompactionFilter {
 public:
  virtual bool Filter(int level, const Slice& key, const Slice& value,
                      std::string* new_value, bool* value_changed) const
      override {
    cfilter_count++;
    return true;
  }

  virtual const char* Name() const override { return "DeleteFilter"; }
};

4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092
class DelayFilter : public CompactionFilter {
 public:
  explicit DelayFilter(DBTest* d) : db_test(d) {}
  virtual bool Filter(int level, const Slice& key, const Slice& value,
                      std::string* new_value,
                      bool* value_changed) const override {
    db_test->env_->addon_time_ += 1000;
    return true;
  }

  virtual const char* Name() const override { return "DelayFilter"; }

 private:
  DBTest* db_test;
};

4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108
class ConditionalFilter : public CompactionFilter {
 public:
  explicit ConditionalFilter(const std::string* filtered_value)
      : filtered_value_(filtered_value) {}
  virtual bool Filter(int level, const Slice& key, const Slice& value,
                      std::string* new_value,
                      bool* value_changed) const override {
    return value.ToString() == *filtered_value_;
  }

  virtual const char* Name() const override { return "ConditionalFilter"; }

 private:
  const std::string* filtered_value_;
};

4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130
class ChangeFilter : public CompactionFilter {
 public:
  explicit ChangeFilter() {}

  virtual bool Filter(int level, const Slice& key, const Slice& value,
                      std::string* new_value, bool* value_changed) const
      override {
    assert(new_value != nullptr);
    *new_value = NEW_VALUE;
    *value_changed = true;
    return false;
  }

  virtual const char* Name() const override { return "ChangeFilter"; }
};

class KeepFilterFactory : public CompactionFilterFactory {
 public:
  explicit KeepFilterFactory(bool check_context = false)
      : check_context_(check_context) {}

  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
4131
      const CompactionFilter::Context& context) override {
4132
    if (check_context_) {
4133 4134
      EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction);
      EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction);
4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147
    }
    return std::unique_ptr<CompactionFilter>(new KeepFilter());
  }

  virtual const char* Name() const override { return "KeepFilterFactory"; }
  bool check_context_;
  std::atomic_bool expect_full_compaction_;
  std::atomic_bool expect_manual_compaction_;
};

class DeleteFilterFactory : public CompactionFilterFactory {
 public:
  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
4148
      const CompactionFilter::Context& context) override {
4149 4150 4151 4152 4153 4154 4155 4156 4157 4158
    if (context.is_manual_compaction) {
      return std::unique_ptr<CompactionFilter>(new DeleteFilter());
    } else {
      return std::unique_ptr<CompactionFilter>(nullptr);
    }
  }

  virtual const char* Name() const override { return "DeleteFilterFactory"; }
};

4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172
class DelayFilterFactory : public CompactionFilterFactory {
 public:
  explicit DelayFilterFactory(DBTest* d) : db_test(d) {}
  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
      const CompactionFilter::Context& context) override {
    return std::unique_ptr<CompactionFilter>(new DelayFilter(db_test));
  }

  virtual const char* Name() const override { return "DelayFilterFactory"; }

 private:
  DBTest* db_test;
};

4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191
class ConditionalFilterFactory : public CompactionFilterFactory {
 public:
  explicit ConditionalFilterFactory(const Slice& filtered_value)
      : filtered_value_(filtered_value.ToString()) {}

  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
      const CompactionFilter::Context& context) override {
    return std::unique_ptr<CompactionFilter>(
        new ConditionalFilter(&filtered_value_));
  }

  virtual const char* Name() const override {
    return "ConditionalFilterFactory";
  }

 private:
  std::string filtered_value_;
};

4192 4193 4194 4195 4196
class ChangeFilterFactory : public CompactionFilterFactory {
 public:
  explicit ChangeFilterFactory() {}

  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
4197
      const CompactionFilter::Context& context) override {
4198 4199 4200 4201 4202 4203
    return std::unique_ptr<CompactionFilter>(new ChangeFilter());
  }

  virtual const char* Name() const override { return "ChangeFilterFactory"; }
};

S
sdong 已提交
4204 4205 4206 4207 4208 4209 4210 4211 4212 4213
class DBTestUniversalCompactionBase
    : public DBTest,
      public ::testing::WithParamInterface<int> {
 public:
  virtual void SetUp() override { num_levels_ = GetParam(); }
  int num_levels_;
};

class DBTestUniversalCompaction : public DBTestUniversalCompactionBase {};

4214 4215
// TODO(kailiu) The tests on UniversalCompaction has some issues:
//  1. A lot of magic numbers ("11" or "12").
4216
//  2. Made assumption on the memtable flush conditions, which may change from
4217
//     time to time.
S
sdong 已提交
4218
TEST_P(DBTestUniversalCompaction, UniversalCompactionTrigger) {
4219
  Options options;
4220
  options.compaction_style = kCompactionStyleUniversal;
S
sdong 已提交
4221 4222 4223
  options.num_levels = num_levels_;
  options.write_buffer_size = 100 << 10;     // 100KB
  options.target_file_size_base = 32 << 10;  // 32KB
4224 4225
  // trigger compaction if there are >= 4 files
  options.level0_file_num_compaction_trigger = 4;
4226 4227 4228 4229
  KeepFilterFactory* filter = new KeepFilterFactory(true);
  filter->expect_manual_compaction_.store(false);
  options.compaction_filter_factory.reset(filter);

4230
  options = CurrentOptions(options);
S
sdong 已提交
4231
  DestroyAndReopen(options);
L
Lei Jin 已提交
4232
  CreateAndReopenWithCF({"pikachu"}, options);
4233

4234 4235 4236 4237 4238 4239 4240 4241 4242 4243
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
      "DBTestWritableFile.GetPreallocationStatus", [&](void* arg) {
        ASSERT_TRUE(arg != nullptr);
        size_t preallocation_size = *(static_cast<size_t*>(arg));
        if (num_levels_ > 3) {
          ASSERT_LE(preallocation_size, options.target_file_size_base * 1.1);
        }
      });
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();

4244 4245 4246
  Random rnd(301);
  int key_idx = 0;

4247
  filter->expect_full_compaction_.store(true);
4248 4249 4250
  // Stage 1:
  //   Generate a set of files at level 0, but don't trigger level-0
  //   compaction.
4251
  for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
4252
       num++) {
4253
    // Write 110KB (11 values, each 10K)
4254
    for (int i = 0; i < 12; i++) {
4255
      ASSERT_OK(Put(1, Key(key_idx), RandomString(&rnd, 10000)));
4256 4257
      key_idx++;
    }
4258
    dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
S
sdong 已提交
4259
    ASSERT_EQ(NumSortedRuns(1), num + 1);
4260 4261 4262 4263
  }

  // Generate one more file at level-0, which should trigger level-0
  // compaction.
4264
  for (int i = 0; i < 11; i++) {
4265
    ASSERT_OK(Put(1, Key(key_idx), RandomString(&rnd, 10000)));
4266 4267 4268 4269 4270 4271
    key_idx++;
  }
  dbfull()->TEST_WaitForCompact();
  // Suppose each file flushed from mem table has size 1. Now we compact
  // (level0_file_num_compaction_trigger+1)=4 files and should have a big
  // file of size 4.
S
sdong 已提交
4272
  ASSERT_EQ(NumSortedRuns(1), 1);
4273 4274 4275 4276 4277 4278 4279 4280

  // Stage 2:
  //   Now we have one file at level 0, with size 4. We also have some data in
  //   mem table. Let's continue generating new files at level 0, but don't
  //   trigger level-0 compaction.
  //   First, clean up memtable before inserting new data. This will generate
  //   a level-0 file, with size around 0.4 (according to previously written
  //   data amount).
4281
  filter->expect_full_compaction_.store(false);
4282 4283
  ASSERT_OK(Flush(1));
  for (int num = 0; num < options.level0_file_num_compaction_trigger - 3;
4284
       num++) {
4285 4286
    // Write 110KB (11 values, each 10K)
    for (int i = 0; i < 11; i++) {
4287
      ASSERT_OK(Put(1, Key(key_idx), RandomString(&rnd, 10000)));
4288 4289
      key_idx++;
    }
4290
    dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
S
sdong 已提交
4291
    ASSERT_EQ(NumSortedRuns(1), num + 3);
4292 4293 4294 4295
  }

  // Generate one more file at level-0, which should trigger level-0
  // compaction.
4296
  for (int i = 0; i < 11; i++) {
4297
    ASSERT_OK(Put(1, Key(key_idx), RandomString(&rnd, 10000)));
4298 4299 4300 4301
    key_idx++;
  }
  dbfull()->TEST_WaitForCompact();
  // Before compaction, we have 4 files at level 0, with size 4, 0.4, 1, 1.
4302
  // After compaction, we should have 2 files, with size 4, 2.4.
S
sdong 已提交
4303
  ASSERT_EQ(NumSortedRuns(1), 2);
4304 4305 4306 4307

  // Stage 3:
  //   Now we have 2 files at level 0, with size 4 and 2.4. Continue
  //   generating new files at level 0.
4308
  for (int num = 0; num < options.level0_file_num_compaction_trigger - 3;
4309
       num++) {
4310 4311
    // Write 110KB (11 values, each 10K)
    for (int i = 0; i < 11; i++) {
4312
      ASSERT_OK(Put(1, Key(key_idx), RandomString(&rnd, 10000)));
4313 4314
      key_idx++;
    }
4315
    dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
S
sdong 已提交
4316
    ASSERT_EQ(NumSortedRuns(1), num + 3);
4317 4318 4319 4320 4321
  }

  // Generate one more file at level-0, which should trigger level-0
  // compaction.
  for (int i = 0; i < 12; i++) {
4322
    ASSERT_OK(Put(1, Key(key_idx), RandomString(&rnd, 10000)));
4323 4324 4325 4326
    key_idx++;
  }
  dbfull()->TEST_WaitForCompact();
  // Before compaction, we have 4 files at level 0, with size 4, 2.4, 1, 1.
4327
  // After compaction, we should have 3 files, with size 4, 2.4, 2.
S
sdong 已提交
4328
  ASSERT_EQ(NumSortedRuns(1), 3);
4329 4330 4331 4332

  // Stage 4:
  //   Now we have 3 files at level 0, with size 4, 2.4, 2. Let's generate a
  //   new file of size 1.
4333
  for (int i = 0; i < 11; i++) {
4334
    ASSERT_OK(Put(1, Key(key_idx), RandomString(&rnd, 10000)));
4335 4336 4337 4338
    key_idx++;
  }
  dbfull()->TEST_WaitForCompact();
  // Level-0 compaction is triggered, but no file will be picked up.
S
sdong 已提交
4339
  ASSERT_EQ(NumSortedRuns(1), 4);
4340 4341 4342 4343

  // Stage 5:
  //   Now we have 4 files at level 0, with size 4, 2.4, 2, 1. Let's generate
  //   a new file of size 1.
4344
  filter->expect_full_compaction_.store(true);
4345
  for (int i = 0; i < 11; i++) {
4346
    ASSERT_OK(Put(1, Key(key_idx), RandomString(&rnd, 10000)));
4347 4348 4349 4350
    key_idx++;
  }
  dbfull()->TEST_WaitForCompact();
  // All files at level 0 will be compacted into a single one.
S
sdong 已提交
4351
  ASSERT_EQ(NumSortedRuns(1), 1);
4352 4353

  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
4354 4355
}

S
sdong 已提交
4356
TEST_P(DBTestUniversalCompaction, UniversalCompactionSizeAmplification) {
4357
  Options options;
4358
  options.compaction_style = kCompactionStyleUniversal;
S
sdong 已提交
4359 4360 4361
  options.num_levels = num_levels_;
  options.write_buffer_size = 100 << 10;     // 100KB
  options.target_file_size_base = 32 << 10;  // 32KB
4362
  options.level0_file_num_compaction_trigger = 3;
4363
  options = CurrentOptions(options);
S
sdong 已提交
4364
  DestroyAndReopen(options);
L
Lei Jin 已提交
4365
  CreateAndReopenWithCF({"pikachu"}, options);
4366 4367

  // Trigger compaction if size amplification exceeds 110%
4368
  options.compaction_options_universal.max_size_amplification_percent = 110;
4369
  options = CurrentOptions(options);
L
Lei Jin 已提交
4370
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
4371 4372 4373 4374 4375

  Random rnd(301);
  int key_idx = 0;

  //   Generate two files in Level 0. Both files are approx the same size.
4376
  for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
4377
       num++) {
4378 4379
    // Write 110KB (11 values, each 10K)
    for (int i = 0; i < 11; i++) {
4380
      ASSERT_OK(Put(1, Key(key_idx), RandomString(&rnd, 10000)));
4381 4382
      key_idx++;
    }
4383
    dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
S
sdong 已提交
4384
    ASSERT_EQ(NumSortedRuns(1), num + 1);
4385
  }
S
sdong 已提交
4386
  ASSERT_EQ(NumSortedRuns(1), 2);
4387 4388 4389 4390

  // Flush whatever is remaining in memtable. This is typically
  // small, which should not trigger size ratio based compaction
  // but will instead trigger size amplification.
4391
  ASSERT_OK(Flush(1));
4392

4393 4394
  dbfull()->TEST_WaitForCompact();

4395
  // Verify that size amplification did occur
S
sdong 已提交
4396
  ASSERT_EQ(NumSortedRuns(1), 1);
4397 4398
}

S
sdong 已提交
4399 4400 4401 4402
class DBTestUniversalCompactionMultiLevels
    : public DBTestUniversalCompactionBase {};

TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionMultiLevels) {
4403
  Options options;
4404
  options.compaction_style = kCompactionStyleUniversal;
S
sdong 已提交
4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455
  options.num_levels = num_levels_;
  options.write_buffer_size = 100 << 10;  // 100KB
  options.level0_file_num_compaction_trigger = 8;
  options.max_background_compactions = 3;
  options.target_file_size_base = 32 * 1024;
  options = CurrentOptions(options);
  CreateAndReopenWithCF({"pikachu"}, options);

  // Trigger compaction if size amplification exceeds 110%
  options.compaction_options_universal.max_size_amplification_percent = 110;
  options = CurrentOptions(options);
  ReopenWithColumnFamilies({"default", "pikachu"}, options);

  Random rnd(301);
  int num_keys = 100000;
  for (int i = 0; i < num_keys * 2; i++) {
    ASSERT_OK(Put(1, Key(i % num_keys), Key(i)));
  }

  dbfull()->TEST_WaitForCompact();

  for (int i = num_keys; i < num_keys * 2; i++) {
    ASSERT_EQ(Get(1, Key(i % num_keys)), Key(i));
  }
}

INSTANTIATE_TEST_CASE_P(DBTestUniversalCompactionMultiLevels,
                        DBTestUniversalCompactionMultiLevels,
                        ::testing::Values(3, 20));

class DBTestUniversalCompactionParallel : public DBTestUniversalCompactionBase {
};

TEST_P(DBTestUniversalCompactionParallel, UniversalCompactionParallel) {
  Options options;
  options.compaction_style = kCompactionStyleUniversal;
  options.num_levels = num_levels_;
  options.write_buffer_size = 1 << 10;  // 1KB
  options.level0_file_num_compaction_trigger = 3;
  options.max_background_compactions = 3;
  options.max_background_flushes = 3;
  options.target_file_size_base = 1 * 1024;
  options.compaction_options_universal.max_size_amplification_percent = 110;
  options = CurrentOptions(options);
  DestroyAndReopen(options);
  CreateAndReopenWithCF({"pikachu"}, options);

  // Delay every compaction so multiple compactions will happen.
  std::atomic<int> num_compactions_running(0);
  std::atomic<bool> has_parallel(false);
  rocksdb::SyncPoint::GetInstance()->SetCallBack("CompactionJob::Run():Start",
4456
                                                 [&](void* arg) {
S
sdong 已提交
4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470
    if (num_compactions_running.fetch_add(1) > 0) {
      has_parallel.store(true);
      return;
    }
    for (int nwait = 0; nwait < 20000; nwait++) {
      if (has_parallel.load() || num_compactions_running.load() > 1) {
        has_parallel.store(true);
        break;
      }
      env_->SleepForMicroseconds(1000);
    }
  });
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
      "CompactionJob::Run():End",
4471
      [&](void* arg) { num_compactions_running.fetch_add(-1); });
S
sdong 已提交
4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();

  options = CurrentOptions(options);
  ReopenWithColumnFamilies({"default", "pikachu"}, options);

  Random rnd(301);
  int num_keys = 30000;
  for (int i = 0; i < num_keys * 2; i++) {
    ASSERT_OK(Put(1, Key(i % num_keys), Key(i)));
  }
  dbfull()->TEST_WaitForCompact();

  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
  ASSERT_EQ(num_compactions_running.load(), 0);
  ASSERT_TRUE(has_parallel.load());

  for (int i = num_keys; i < num_keys * 2; i++) {
    ASSERT_EQ(Get(1, Key(i % num_keys)), Key(i));
  }

  // Reopen and check.
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
  for (int i = num_keys; i < num_keys * 2; i++) {
    ASSERT_EQ(Get(1, Key(i % num_keys)), Key(i));
  }
}

INSTANTIATE_TEST_CASE_P(DBTestUniversalCompactionParallel,
                        DBTestUniversalCompactionParallel,
                        ::testing::Values(1, 10));

TEST_P(DBTestUniversalCompaction, UniversalCompactionOptions) {
  Options options;
  options.compaction_style = kCompactionStyleUniversal;
  options.write_buffer_size = 100 << 10;     // 100KB
  options.target_file_size_base = 32 << 10;  // 32KB
4508
  options.level0_file_num_compaction_trigger = 4;
S
sdong 已提交
4509
  options.num_levels = num_levels_;
4510
  options.compaction_options_universal.compression_size_percent = -1;
4511
  options = CurrentOptions(options);
S
sdong 已提交
4512
  DestroyAndReopen(options);
L
Lei Jin 已提交
4513
  CreateAndReopenWithCF({"pikachu"}, options);
4514 4515 4516 4517

  Random rnd(301);
  int key_idx = 0;

4518
  for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
4519 4520
    // Write 110KB (11 values, each 10K)
    for (int i = 0; i < 11; i++) {
4521
      ASSERT_OK(Put(1, Key(key_idx), RandomString(&rnd, 10000)));
4522 4523
      key_idx++;
    }
4524
    dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
4525 4526

    if (num < options.level0_file_num_compaction_trigger - 1) {
S
sdong 已提交
4527
      ASSERT_EQ(NumSortedRuns(1), num + 1);
4528 4529 4530 4531
    }
  }

  dbfull()->TEST_WaitForCompact();
S
sdong 已提交
4532
  ASSERT_EQ(NumSortedRuns(1), 1);
4533 4534
}

S
sdong 已提交
4535
TEST_P(DBTestUniversalCompaction, UniversalCompactionStopStyleSimilarSize) {
4536 4537
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleUniversal;
S
sdong 已提交
4538 4539
  options.write_buffer_size = 100 << 10;     // 100KB
  options.target_file_size_base = 32 << 10;  // 32KB
4540 4541 4542
  // trigger compaction if there are >= 4 files
  options.level0_file_num_compaction_trigger = 4;
  options.compaction_options_universal.size_ratio = 10;
S
sdong 已提交
4543 4544 4545 4546
  options.compaction_options_universal.stop_style =
      kCompactionStopStyleSimilarSize;
  options.num_levels = num_levels_;
  DestroyAndReopen(options);
4547 4548 4549 4550 4551 4552 4553

  Random rnd(301);
  int key_idx = 0;

  // Stage 1:
  //   Generate a set of files at level 0, but don't trigger level-0
  //   compaction.
S
sdong 已提交
4554
  for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
4555
       num++) {
4556 4557
    // Write 110KB (11 values, each 10K)
    for (int i = 0; i < 11; i++) {
4558 4559 4560 4561
      ASSERT_OK(Put(Key(key_idx), RandomString(&rnd, 10000)));
      key_idx++;
    }
    dbfull()->TEST_WaitForFlushMemTable();
S
sdong 已提交
4562
    ASSERT_EQ(NumSortedRuns(), num + 1);
4563 4564 4565 4566
  }

  // Generate one more file at level-0, which should trigger level-0
  // compaction.
4567
  for (int i = 0; i < 11; i++) {
4568 4569 4570 4571 4572 4573 4574
    ASSERT_OK(Put(Key(key_idx), RandomString(&rnd, 10000)));
    key_idx++;
  }
  dbfull()->TEST_WaitForCompact();
  // Suppose each file flushed from mem table has size 1. Now we compact
  // (level0_file_num_compaction_trigger+1)=4 files and should have a big
  // file of size 4.
S
sdong 已提交
4575
  ASSERT_EQ(NumSortedRuns(), 1);
4576 4577 4578 4579 4580 4581 4582 4583 4584

  // Stage 2:
  //   Now we have one file at level 0, with size 4. We also have some data in
  //   mem table. Let's continue generating new files at level 0, but don't
  //   trigger level-0 compaction.
  //   First, clean up memtable before inserting new data. This will generate
  //   a level-0 file, with size around 0.4 (according to previously written
  //   data amount).
  dbfull()->Flush(FlushOptions());
S
sdong 已提交
4585
  for (int num = 0; num < options.level0_file_num_compaction_trigger - 3;
4586
       num++) {
4587 4588
    // Write 110KB (11 values, each 10K)
    for (int i = 0; i < 11; i++) {
4589 4590 4591 4592
      ASSERT_OK(Put(Key(key_idx), RandomString(&rnd, 10000)));
      key_idx++;
    }
    dbfull()->TEST_WaitForFlushMemTable();
S
sdong 已提交
4593
    ASSERT_EQ(NumSortedRuns(), num + 3);
4594 4595 4596 4597
  }

  // Generate one more file at level-0, which should trigger level-0
  // compaction.
4598
  for (int i = 0; i < 11; i++) {
4599 4600 4601 4602 4603 4604
    ASSERT_OK(Put(Key(key_idx), RandomString(&rnd, 10000)));
    key_idx++;
  }
  dbfull()->TEST_WaitForCompact();
  // Before compaction, we have 4 files at level 0, with size 4, 0.4, 1, 1.
  // After compaction, we should have 3 files, with size 4, 0.4, 2.
S
sdong 已提交
4605
  ASSERT_EQ(NumSortedRuns(), 3);
4606 4607 4608
  // Stage 3:
  //   Now we have 3 files at level 0, with size 4, 0.4, 2. Generate one
  //   more file at level-0, which should trigger level-0 compaction.
4609
  for (int i = 0; i < 11; i++) {
4610 4611 4612 4613 4614
    ASSERT_OK(Put(Key(key_idx), RandomString(&rnd, 10000)));
    key_idx++;
  }
  dbfull()->TEST_WaitForCompact();
  // Level-0 compaction is triggered, but no file will be picked up.
S
sdong 已提交
4615
  ASSERT_EQ(NumSortedRuns(), 4);
4616 4617
}

I
Igor Sugak 已提交
4618
TEST_F(DBTest, CompressedCache) {
I
Igor Canadi 已提交
4619
  if (!Snappy_Supported()) {
4620 4621
    return;
  }
4622 4623 4624 4625 4626 4627
  int num_iter = 80;

  // Run this test three iterations.
  // Iteration 1: only a uncompressed block cache
  // Iteration 2: only a compressed block cache
  // Iteration 3: both block cache and compressed cache
I
Igor Canadi 已提交
4628 4629 4630
  // Iteration 4: both block cache and compressed cache, but DB is not
  // compressed
  for (int iter = 0; iter < 4; iter++) {
4631
    Options options;
4632 4633
    options.write_buffer_size = 64*1024;        // small write buffer
    options.statistics = rocksdb::CreateDBStatistics();
4634
    options = CurrentOptions(options);
4635

4636
    BlockBasedTableOptions table_options;
4637 4638 4639
    switch (iter) {
      case 0:
        // only uncompressed block cache
4640 4641 4642
        table_options.block_cache = NewLRUCache(8*1024);
        table_options.block_cache_compressed = nullptr;
        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
4643 4644 4645
        break;
      case 1:
        // no block cache, only compressed cache
4646 4647 4648 4649
        table_options.no_block_cache = true;
        table_options.block_cache = nullptr;
        table_options.block_cache_compressed = NewLRUCache(8*1024);
        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
4650 4651 4652
        break;
      case 2:
        // both compressed and uncompressed block cache
4653 4654 4655
        table_options.block_cache = NewLRUCache(1024);
        table_options.block_cache_compressed = NewLRUCache(8*1024);
        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
4656
        break;
I
Igor Canadi 已提交
4657 4658 4659
      case 3:
        // both block cache and compressed cache, but DB is not compressed
        // also, make block cache sizes bigger, to trigger block cache hits
4660 4661 4662
        table_options.block_cache = NewLRUCache(1024 * 1024);
        table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024);
        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
I
Igor Canadi 已提交
4663 4664
        options.compression = kNoCompression;
        break;
4665 4666 4667
      default:
        ASSERT_TRUE(false);
    }
L
Lei Jin 已提交
4668
    CreateAndReopenWithCF({"pikachu"}, options);
4669 4670 4671
    // default column family doesn't have block cache
    Options no_block_cache_opts;
    no_block_cache_opts.statistics = options.statistics;
4672
    no_block_cache_opts = CurrentOptions(no_block_cache_opts);
4673 4674 4675 4676
    BlockBasedTableOptions table_options_no_bc;
    table_options_no_bc.no_block_cache = true;
    no_block_cache_opts.table_factory.reset(
        NewBlockBasedTableFactory(table_options_no_bc));
4677
    ReopenWithColumnFamilies({"default", "pikachu"},
L
Lei Jin 已提交
4678
        std::vector<Options>({no_block_cache_opts, options}));
4679 4680 4681 4682

    Random rnd(301);

    // Write 8MB (80 values, each 100K)
4683
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
4684 4685 4686 4687 4688 4689 4690
    std::vector<std::string> values;
    std::string str;
    for (int i = 0; i < num_iter; i++) {
      if (i % 4 == 0) {        // high compression ratio
        str = RandomString(&rnd, 1000);
      }
      values.push_back(str);
4691
      ASSERT_OK(Put(1, Key(i), values[i]));
4692 4693 4694
    }

    // flush all data from memtable so that reads are from block cache
4695
    ASSERT_OK(Flush(1));
4696 4697

    for (int i = 0; i < num_iter; i++) {
4698
      ASSERT_EQ(Get(1, Key(i)), values[i]);
4699 4700 4701 4702 4703 4704
    }

    // check that we triggered the appropriate code paths in the cache
    switch (iter) {
      case 0:
        // only uncompressed block cache
I
Igor Canadi 已提交
4705 4706
        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
        ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
4707 4708 4709
        break;
      case 1:
        // no block cache, only compressed cache
I
Igor Canadi 已提交
4710 4711
        ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
4712 4713 4714
        break;
      case 2:
        // both compressed and uncompressed block cache
I
Igor Canadi 已提交
4715 4716
        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
4717
        break;
I
Igor Canadi 已提交
4718 4719 4720 4721 4722 4723 4724 4725 4726
      case 3:
        // both compressed and uncompressed block cache
        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_HIT), 0);
        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
        // compressed doesn't have any hits since blocks are not compressed on
        // storage
        ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT), 0);
        break;
4727 4728 4729
      default:
        ASSERT_TRUE(false);
    }
4730 4731

    options.create_if_missing = true;
L
Lei Jin 已提交
4732
    DestroyAndReopen(options);
4733 4734 4735
  }
}

K
kailiu 已提交
4736 4737 4738 4739 4740 4741
static std::string CompressibleString(Random* rnd, int len) {
  std::string r;
  test::CompressibleString(rnd, 0.8, len, &r);
  return r;
}

S
sdong 已提交
4742
TEST_P(DBTestUniversalCompaction, UniversalCompactionCompressRatio1) {
I
Igor Canadi 已提交
4743
  if (!Snappy_Supported()) {
4744 4745
    return;
  }
S
sdong 已提交
4746

4747
  Options options;
4748
  options.compaction_style = kCompactionStyleUniversal;
S
sdong 已提交
4749 4750
  options.write_buffer_size = 100 << 10;     // 100KB
  options.target_file_size_base = 32 << 10;  // 32KB
4751
  options.level0_file_num_compaction_trigger = 2;
S
sdong 已提交
4752
  options.num_levels = num_levels_;
4753
  options.compaction_options_universal.compression_size_percent = 70;
4754
  options = CurrentOptions(options);
S
sdong 已提交
4755
  DestroyAndReopen(options);
4756 4757 4758 4759 4760 4761

  Random rnd(301);
  int key_idx = 0;

  // The first compaction (2) is compressed.
  for (int num = 0; num < 2; num++) {
4762 4763
    // Write 110KB (11 values, each 10K)
    for (int i = 0; i < 11; i++) {
4764 4765 4766 4767 4768 4769
      ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000)));
      key_idx++;
    }
    dbfull()->TEST_WaitForFlushMemTable();
    dbfull()->TEST_WaitForCompact();
  }
S
sdong 已提交
4770
  ASSERT_LT(TotalSize(), 110000U * 2 * 0.9);
4771 4772 4773

  // The second compaction (4) is compressed
  for (int num = 0; num < 2; num++) {
4774 4775
    // Write 110KB (11 values, each 10K)
    for (int i = 0; i < 11; i++) {
4776 4777 4778 4779 4780 4781
      ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000)));
      key_idx++;
    }
    dbfull()->TEST_WaitForFlushMemTable();
    dbfull()->TEST_WaitForCompact();
  }
S
sdong 已提交
4782
  ASSERT_LT(TotalSize(), 110000 * 4 * 0.9);
4783 4784 4785 4786

  // The third compaction (2 4) is compressed since this time it is
  // (1 1 3.2) and 3.2/5.2 doesn't reach ratio.
  for (int num = 0; num < 2; num++) {
4787 4788
    // Write 110KB (11 values, each 10K)
    for (int i = 0; i < 11; i++) {
4789 4790 4791 4792 4793 4794
      ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000)));
      key_idx++;
    }
    dbfull()->TEST_WaitForFlushMemTable();
    dbfull()->TEST_WaitForCompact();
  }
S
sdong 已提交
4795
  ASSERT_LT(TotalSize(), 110000 * 6 * 0.9);
4796 4797 4798 4799

  // When we start for the compaction up to (2 4 8), the latest
  // compressed is not compressed.
  for (int num = 0; num < 8; num++) {
4800 4801
    // Write 110KB (11 values, each 10K)
    for (int i = 0; i < 11; i++) {
4802 4803 4804 4805 4806 4807
      ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000)));
      key_idx++;
    }
    dbfull()->TEST_WaitForFlushMemTable();
    dbfull()->TEST_WaitForCompact();
  }
S
sdong 已提交
4808
  ASSERT_GT(TotalSize(), 110000 * 11 * 0.8 + 110000 * 2);
4809 4810
}

S
sdong 已提交
4811
TEST_P(DBTestUniversalCompaction, UniversalCompactionCompressRatio2) {
I
Igor Canadi 已提交
4812
  if (!Snappy_Supported()) {
4813 4814
    return;
  }
4815
  Options options;
4816
  options.compaction_style = kCompactionStyleUniversal;
S
sdong 已提交
4817 4818
  options.write_buffer_size = 100 << 10;     // 100KB
  options.target_file_size_base = 32 << 10;  // 32KB
4819
  options.level0_file_num_compaction_trigger = 2;
S
sdong 已提交
4820
  options.num_levels = num_levels_;
4821
  options.compaction_options_universal.compression_size_percent = 95;
4822
  options = CurrentOptions(options);
S
sdong 已提交
4823
  DestroyAndReopen(options);
4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838

  Random rnd(301);
  int key_idx = 0;

  // When we start for the compaction up to (2 4 8), the latest
  // compressed is compressed given the size ratio to compress.
  for (int num = 0; num < 14; num++) {
    // Write 120KB (12 values, each 10K)
    for (int i = 0; i < 12; i++) {
      ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000)));
      key_idx++;
    }
    dbfull()->TEST_WaitForFlushMemTable();
    dbfull()->TEST_WaitForCompact();
  }
S
sdong 已提交
4839
  ASSERT_LT(TotalSize(), 120000U * 12 * 0.8 + 120000 * 2);
4840
}
4841

S
sdong 已提交
4842 4843 4844
INSTANTIATE_TEST_CASE_P(UniversalCompactionNumLevels, DBTestUniversalCompaction,
                        ::testing::Values(1, 3, 5));

I
Igor Sugak 已提交
4845
TEST_F(DBTest, FailMoreDbPaths) {
4846
  Options options = CurrentOptions();
4847 4848 4849 4850 4851
  options.db_paths.emplace_back(dbname_, 10000000);
  options.db_paths.emplace_back(dbname_ + "_2", 1000000);
  options.db_paths.emplace_back(dbname_ + "_3", 1000000);
  options.db_paths.emplace_back(dbname_ + "_4", 1000000);
  options.db_paths.emplace_back(dbname_ + "_5", 1000000);
L
Lei Jin 已提交
4852
  ASSERT_TRUE(TryReopen(options).IsNotSupported());
4853
}
4854

I
Igor Sugak 已提交
4855
TEST_F(DBTest, UniversalCompactionSecondPathRatio) {
I
Igor Canadi 已提交
4856
  if (!Snappy_Supported()) {
4857 4858
    return;
  }
4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874
  Options options;
  options.db_paths.emplace_back(dbname_, 500 * 1024);
  options.db_paths.emplace_back(dbname_ + "_2", 1024 * 1024 * 1024);
  options.compaction_style = kCompactionStyleUniversal;
  options.write_buffer_size = 100 << 10;  // 100KB
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 1;
  options = CurrentOptions(options);

  std::vector<std::string> filenames;
  env_->GetChildren(options.db_paths[1].path, &filenames);
  // Delete archival files.
  for (size_t i = 0; i < filenames.size(); ++i) {
    env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i]);
  }
  env_->DeleteDir(options.db_paths[1].path);
L
Lei Jin 已提交
4875
  Reopen(options);
4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940

  Random rnd(301);
  int key_idx = 0;

  // First three 110KB files are not going to second path.
  // After that, (100K, 200K)
  for (int num = 0; num < 3; num++) {
    GenerateNewFile(&rnd, &key_idx);
  }

  // Another 110KB triggers a compaction to 400K file to second path
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));

  // (1, 4)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1,1,4) -> (2, 4)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 2, 4)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(2, GetSstFileCount(dbname_));

  // (1, 1, 2, 4) -> (8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(0, GetSstFileCount(dbname_));

  // (1, 8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 1, 8) -> (2, 8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 2, 8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(2, GetSstFileCount(dbname_));

  // (1, 1, 2, 8) -> (4, 8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(0, GetSstFileCount(dbname_));

  // (1, 4, 8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 10000);
  }

L
Lei Jin 已提交
4941
  Reopen(options);
4942 4943 4944 4945 4946 4947 4948

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 10000);
  }

L
Lei Jin 已提交
4949
  Destroy(options);
4950 4951
}

I
Igor Sugak 已提交
4952
TEST_F(DBTest, LevelCompactionThirdPath) {
4953
  Options options = CurrentOptions();
4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064
  options.db_paths.emplace_back(dbname_, 500 * 1024);
  options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024);
  options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024);
  options.compaction_style = kCompactionStyleLevel;
  options.write_buffer_size = 100 << 10;  // 100KB
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 4;
  options.max_bytes_for_level_base = 400 * 1024;
  //  options = CurrentOptions(options);

  std::vector<std::string> filenames;
  env_->GetChildren(options.db_paths[1].path, &filenames);
  // Delete archival files.
  for (size_t i = 0; i < filenames.size(); ++i) {
    env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i]);
  }
  env_->DeleteDir(options.db_paths[1].path);
  Reopen(options);

  Random rnd(301);
  int key_idx = 0;

  // First three 110KB files are not going to second path.
  // After that, (100K, 200K)
  for (int num = 0; num < 3; num++) {
    GenerateNewFile(&rnd, &key_idx);
  }

  // Another 110KB triggers a compaction to 400K file to fill up first path
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(3, GetSstFileCount(options.db_paths[1].path));

  // (1, 4)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4", FilesPerLevel(0));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 1)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,1", FilesPerLevel(0));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 2)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,2", FilesPerLevel(0));
  ASSERT_EQ(2, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 3)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,3", FilesPerLevel(0));
  ASSERT_EQ(3, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 4)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,4", FilesPerLevel(0));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 5)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,5", FilesPerLevel(0));
  ASSERT_EQ(5, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 6)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,6", FilesPerLevel(0));
  ASSERT_EQ(6, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 7)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,7", FilesPerLevel(0));
  ASSERT_EQ(7, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,8", FilesPerLevel(0));
  ASSERT_EQ(8, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 10000);
  }

  Reopen(options);

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 10000);
  }

  Destroy(options);
}

I
Igor Sugak 已提交
5065
TEST_F(DBTest, LevelCompactionPathUse) {
5066
  Options options = CurrentOptions();
5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178
  options.db_paths.emplace_back(dbname_, 500 * 1024);
  options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024);
  options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024);
  options.compaction_style = kCompactionStyleLevel;
  options.write_buffer_size = 100 << 10;  // 100KB
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 4;
  options.max_bytes_for_level_base = 400 * 1024;
  //  options = CurrentOptions(options);

  std::vector<std::string> filenames;
  env_->GetChildren(options.db_paths[1].path, &filenames);
  // Delete archival files.
  for (size_t i = 0; i < filenames.size(); ++i) {
    env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i]);
  }
  env_->DeleteDir(options.db_paths[1].path);
  Reopen(options);

  Random rnd(301);
  int key_idx = 0;

  // Always gets compacted into 1 Level1 file,
  // 0/1 Level 0 file
  for (int num = 0; num < 3; num++) {
    key_idx = 0;
    GenerateNewFile(&rnd, &key_idx);
  }

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,1", FilesPerLevel(0));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("0,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(0, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("0,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(0, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("0,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(0, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("0,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(0, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 10000);
  }

  Reopen(options);

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 10000);
  }

  Destroy(options);
}

I
Igor Sugak 已提交
5179
TEST_F(DBTest, UniversalCompactionFourPaths) {
5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197
  Options options;
  options.db_paths.emplace_back(dbname_, 300 * 1024);
  options.db_paths.emplace_back(dbname_ + "_2", 300 * 1024);
  options.db_paths.emplace_back(dbname_ + "_3", 500 * 1024);
  options.db_paths.emplace_back(dbname_ + "_4", 1024 * 1024 * 1024);
  options.compaction_style = kCompactionStyleUniversal;
  options.write_buffer_size = 100 << 10;  // 100KB
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 1;
  options = CurrentOptions(options);

  std::vector<std::string> filenames;
  env_->GetChildren(options.db_paths[1].path, &filenames);
  // Delete archival files.
  for (size_t i = 0; i < filenames.size(); ++i) {
    env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i]);
  }
  env_->DeleteDir(options.db_paths[1].path);
L
Lei Jin 已提交
5198
  Reopen(options);
5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266

  Random rnd(301);
  int key_idx = 0;

  // First three 110KB files are not going to second path.
  // After that, (100K, 200K)
  for (int num = 0; num < 3; num++) {
    GenerateNewFile(&rnd, &key_idx);
  }

  // Another 110KB triggers a compaction to 400K file to second path
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path));

  // (1, 4)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1,1,4) -> (2, 4)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(0, GetSstFileCount(dbname_));

  // (1, 2, 4)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 1, 2, 4) -> (8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path));

  // (1, 8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 1, 8) -> (2, 8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));

  // (1, 2, 8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 1, 2, 8) -> (4, 8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path));

  // (1, 4, 8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 10000);
  }

L
Lei Jin 已提交
5267
  Reopen(options);
5268 5269 5270 5271 5272 5273 5274

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 10000);
  }

L
Lei Jin 已提交
5275
  Destroy(options);
5276
}
5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296

void CheckColumnFamilyMeta(const ColumnFamilyMetaData& cf_meta) {
  uint64_t cf_size = 0;
  uint64_t cf_csize = 0;
  size_t file_count = 0;
  for (auto level_meta : cf_meta.levels) {
    uint64_t level_size = 0;
    uint64_t level_csize = 0;
    file_count += level_meta.files.size();
    for (auto file_meta : level_meta.files) {
      level_size += file_meta.size;
    }
    ASSERT_EQ(level_meta.size, level_size);
    cf_size += level_size;
    cf_csize += level_csize;
  }
  ASSERT_EQ(cf_meta.file_count, file_count);
  ASSERT_EQ(cf_meta.size, cf_size);
}

I
Igor Sugak 已提交
5297
TEST_F(DBTest, ColumnFamilyMetaDataTest) {
5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311
  Options options = CurrentOptions();
  options.create_if_missing = true;
  DestroyAndReopen(options);

  Random rnd(301);
  int key_index = 0;
  ColumnFamilyMetaData cf_meta;
  for (int i = 0; i < 100; ++i) {
    GenerateNewFile(&rnd, &key_index);
    db_->GetColumnFamilyMetaData(&cf_meta);
    CheckColumnFamilyMeta(cf_meta);
  }
}

I
Igor Sugak 已提交
5312
TEST_F(DBTest, ConvertCompactionStyle) {
5313 5314 5315 5316 5317
  Random rnd(301);
  int max_key_level_insert = 200;
  int max_key_universal_insert = 600;

  // Stage 1: generate a db with level compaction
5318
  Options options;
5319 5320 5321 5322 5323 5324 5325
  options.write_buffer_size = 100<<10; //100KB
  options.num_levels = 4;
  options.level0_file_num_compaction_trigger = 3;
  options.max_bytes_for_level_base = 500<<10; // 500KB
  options.max_bytes_for_level_multiplier = 1;
  options.target_file_size_base = 200<<10; // 200KB
  options.target_file_size_multiplier = 1;
5326
  options = CurrentOptions(options);
L
Lei Jin 已提交
5327
  CreateAndReopenWithCF({"pikachu"}, options);
5328 5329 5330

  for (int i = 0; i <= max_key_level_insert; i++) {
    // each value is 10K
5331
    ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000)));
5332
  }
5333
  ASSERT_OK(Flush(1));
5334 5335
  dbfull()->TEST_WaitForCompact();

5336
  ASSERT_GT(TotalTableFiles(1, 4), 1);
5337
  int non_level0_num_files = 0;
5338 5339
  for (int i = 1; i < options.num_levels; i++) {
    non_level0_num_files += NumTableFilesAtLevel(i, 1);
5340 5341 5342 5343 5344 5345
  }
  ASSERT_GT(non_level0_num_files, 0);

  // Stage 2: reopen with universal compaction - should fail
  options = CurrentOptions();
  options.compaction_style = kCompactionStyleUniversal;
S
sdong 已提交
5346
  options.num_levels = 1;
5347
  options = CurrentOptions(options);
L
Lei Jin 已提交
5348
  Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
5349 5350 5351 5352 5353 5354 5355 5356 5357
  ASSERT_TRUE(s.IsInvalidArgument());

  // Stage 3: compact into a single file and move the file to level 0
  options = CurrentOptions();
  options.disable_auto_compactions = true;
  options.target_file_size_base = INT_MAX;
  options.target_file_size_multiplier = 1;
  options.max_bytes_for_level_base = INT_MAX;
  options.max_bytes_for_level_multiplier = 1;
S
sdong 已提交
5358
  options.num_levels = 4;
5359
  options = CurrentOptions(options);
L
Lei Jin 已提交
5360
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
5361

5362 5363
  dbfull()->CompactRange(handles_[1], nullptr, nullptr, true /* reduce level */,
                         0 /* reduce to level 0 */);
5364

5365 5366
  for (int i = 0; i < options.num_levels; i++) {
    int num = NumTableFilesAtLevel(i, 1);
5367 5368 5369 5370 5371 5372 5373 5374 5375 5376
    if (i == 0) {
      ASSERT_EQ(num, 1);
    } else {
      ASSERT_EQ(num, 0);
    }
  }

  // Stage 4: re-open in universal compaction style and do some db operations
  options = CurrentOptions();
  options.compaction_style = kCompactionStyleUniversal;
S
sdong 已提交
5377
  options.num_levels = 4;
5378 5379
  options.write_buffer_size = 100<<10; //100KB
  options.level0_file_num_compaction_trigger = 3;
5380
  options = CurrentOptions(options);
L
Lei Jin 已提交
5381
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
5382

S
sdong 已提交
5383 5384 5385
  options.num_levels = 1;
  ReopenWithColumnFamilies({"default", "pikachu"}, options);

5386
  for (int i = max_key_level_insert / 2; i <= max_key_universal_insert; i++) {
5387
    ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000)));
5388 5389
  }
  dbfull()->Flush(FlushOptions());
5390
  ASSERT_OK(Flush(1));
5391 5392
  dbfull()->TEST_WaitForCompact();

5393 5394
  for (int i = 1; i < options.num_levels; i++) {
    ASSERT_EQ(NumTableFilesAtLevel(i, 1), 0);
5395 5396 5397 5398 5399
  }

  // verify keys inserted in both level compaction style and universal
  // compaction style
  std::string keys_in_db;
5400
  Iterator* iter = dbfull()->NewIterator(ReadOptions(), handles_[1]);
5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
    keys_in_db.append(iter->key().ToString());
    keys_in_db.push_back(',');
  }
  delete iter;

  std::string expected_keys;
  for (int i = 0; i <= max_key_universal_insert; i++) {
    expected_keys.append(Key(i));
    expected_keys.push_back(',');
  }

  ASSERT_EQ(keys_in_db, expected_keys);
}

S
sdong 已提交
5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440
TEST_F(DBTest, IncreaseUniversalCompactionNumLevels) {
  std::function<void(int)> verify_func = [&](int num_keys_in_db) {
    std::string keys_in_db;
    Iterator* iter = dbfull()->NewIterator(ReadOptions(), handles_[1]);
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
      keys_in_db.append(iter->key().ToString());
      keys_in_db.push_back(',');
    }
    delete iter;

    std::string expected_keys;
    for (int i = 0; i <= num_keys_in_db; i++) {
      expected_keys.append(Key(i));
      expected_keys.push_back(',');
    }

    ASSERT_EQ(keys_in_db, expected_keys);
  };

  Random rnd(301);
  int max_key1 = 200;
  int max_key2 = 600;
  int max_key3 = 800;

  // Stage 1: open a DB with universal compaction, num_levels=1
5441
  Options options = CurrentOptions();
S
sdong 已提交
5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506
  options.compaction_style = kCompactionStyleUniversal;
  options.num_levels = 1;
  options.write_buffer_size = 100 << 10;  // 100KB
  options.level0_file_num_compaction_trigger = 3;
  options = CurrentOptions(options);
  CreateAndReopenWithCF({"pikachu"}, options);

  for (int i = 0; i <= max_key1; i++) {
    // each value is 10K
    ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000)));
  }
  ASSERT_OK(Flush(1));
  dbfull()->TEST_WaitForCompact();

  int non_level0_num_files = 0;
  for (int i = 1; i < options.num_levels; i++) {
    non_level0_num_files += NumTableFilesAtLevel(i, 1);
  }
  ASSERT_EQ(non_level0_num_files, 0);

  // Stage 2: reopen with universal compaction, num_levels=4
  options.compaction_style = kCompactionStyleUniversal;
  options.num_levels = 4;
  options = CurrentOptions(options);
  ReopenWithColumnFamilies({"default", "pikachu"}, options);

  verify_func(max_key1);

  // Insert more keys
  for (int i = max_key1 + 1; i <= max_key2; i++) {
    // each value is 10K
    ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000)));
  }
  ASSERT_OK(Flush(1));
  dbfull()->TEST_WaitForCompact();

  verify_func(max_key2);
  // Compaction to non-L0 has happened.
  ASSERT_GT(NumTableFilesAtLevel(options.num_levels - 1, 1), 0);

  // Stage 3: Revert it back to one level and revert to num_levels=1.
  options.num_levels = 4;
  options.target_file_size_base = INT_MAX;
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
  // Compact all to level 0
  dbfull()->CompactRange(handles_[1], nullptr, nullptr, true /* reduce level */,
                         0 /* reduce to level 0 */);
  // Need to restart it once to remove higher level records in manifest.
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
  // Final reopen
  options.compaction_style = kCompactionStyleUniversal;
  options.num_levels = 1;
  options = CurrentOptions(options);
  ReopenWithColumnFamilies({"default", "pikachu"}, options);

  // Insert more keys
  for (int i = max_key2 + 1; i <= max_key3; i++) {
    // each value is 10K
    ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000)));
  }
  ASSERT_OK(Flush(1));
  dbfull()->TEST_WaitForCompact();
  verify_func(max_key3);
}

I
Igor Canadi 已提交
5507
namespace {
5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520
void MinLevelHelper(DBTest* self, Options& options) {
  Random rnd(301);

  for (int num = 0;
    num < options.level0_file_num_compaction_trigger - 1;
    num++)
  {
    std::vector<std::string> values;
    // Write 120KB (12 values, each 10K)
    for (int i = 0; i < 12; i++) {
      values.push_back(RandomString(&rnd, 10000));
      ASSERT_OK(self->Put(Key(i), values[i]));
    }
5521
    self->dbfull()->TEST_WaitForFlushMemTable();
5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536
    ASSERT_EQ(self->NumTableFilesAtLevel(0), num + 1);
  }

  //generate one more file in level-0, and should trigger level-0 compaction
  std::vector<std::string> values;
  for (int i = 0; i < 12; i++) {
    values.push_back(RandomString(&rnd, 10000));
    ASSERT_OK(self->Put(Key(i), values[i]));
  }
  self->dbfull()->TEST_WaitForCompact();

  ASSERT_EQ(self->NumTableFilesAtLevel(0), 0);
  ASSERT_EQ(self->NumTableFilesAtLevel(1), 1);
}

A
amayank 已提交
5537 5538
// returns false if the calling-Test should be skipped
bool MinLevelToCompress(CompressionType& type, Options& options, int wbits,
5539
                        int lev, int strategy) {
5540
  fprintf(stderr, "Test with compression options : window_bits = %d, level =  %d, strategy = %d}\n", wbits, lev, strategy);
5541 5542 5543 5544
  options.write_buffer_size = 100<<10; //100KB
  options.num_levels = 3;
  options.max_mem_compaction_level = 0;
  options.level0_file_num_compaction_trigger = 3;
5545
  options.create_if_missing = true;
5546

I
Igor Canadi 已提交
5547
  if (Snappy_Supported()) {
5548 5549
    type = kSnappyCompression;
    fprintf(stderr, "using snappy\n");
I
Igor Canadi 已提交
5550
  } else if (Zlib_Supported()) {
5551 5552
    type = kZlibCompression;
    fprintf(stderr, "using zlib\n");
I
Igor Canadi 已提交
5553
  } else if (BZip2_Supported()) {
5554 5555
    type = kBZip2Compression;
    fprintf(stderr, "using bzip2\n");
I
Igor Canadi 已提交
5556
  } else if (LZ4_Supported()) {
A
Albert Strasheim 已提交
5557 5558
    type = kLZ4Compression;
    fprintf(stderr, "using lz4\n");
5559 5560
  } else {
    fprintf(stderr, "skipping test, compression disabled\n");
A
amayank 已提交
5561
    return false;
5562
  }
5563
  options.compression_per_level.resize(options.num_levels);
5564 5565 5566 5567 5568 5569 5570 5571

  // do not compress L0
  for (int i = 0; i < 1; i++) {
    options.compression_per_level[i] = kNoCompression;
  }
  for (int i = 1; i < options.num_levels; i++) {
    options.compression_per_level[i] = type;
  }
A
amayank 已提交
5572
  return true;
5573
}
I
Igor Canadi 已提交
5574
}  // namespace
5575

I
Igor Sugak 已提交
5576
TEST_F(DBTest, MinLevelToCompress1) {
5577
  Options options = CurrentOptions();
5578
  CompressionType type = kSnappyCompression;
A
amayank 已提交
5579 5580 5581
  if (!MinLevelToCompress(type, options, -14, -1, 0)) {
    return;
  }
L
Lei Jin 已提交
5582
  Reopen(options);
5583 5584 5585 5586 5587 5588 5589 5590 5591
  MinLevelHelper(this, options);

  // do not compress L0 and L1
  for (int i = 0; i < 2; i++) {
    options.compression_per_level[i] = kNoCompression;
  }
  for (int i = 2; i < options.num_levels; i++) {
    options.compression_per_level[i] = type;
  }
L
Lei Jin 已提交
5592
  DestroyAndReopen(options);
5593 5594 5595
  MinLevelHelper(this, options);
}

I
Igor Sugak 已提交
5596
TEST_F(DBTest, MinLevelToCompress2) {
5597
  Options options = CurrentOptions();
5598
  CompressionType type = kSnappyCompression;
A
amayank 已提交
5599 5600 5601
  if (!MinLevelToCompress(type, options, 15, -1, 0)) {
    return;
  }
L
Lei Jin 已提交
5602
  Reopen(options);
5603 5604
  MinLevelHelper(this, options);

5605 5606 5607 5608 5609 5610 5611
  // do not compress L0 and L1
  for (int i = 0; i < 2; i++) {
    options.compression_per_level[i] = kNoCompression;
  }
  for (int i = 2; i < options.num_levels; i++) {
    options.compression_per_level[i] = type;
  }
L
Lei Jin 已提交
5612
  DestroyAndReopen(options);
5613 5614
  MinLevelHelper(this, options);
}
5615

I
Igor Sugak 已提交
5616
TEST_F(DBTest, RepeatedWritesToSameKey) {
5617
  do {
5618
    Options options;
5619 5620
    options.env = env_;
    options.write_buffer_size = 100000;  // Small write buffer
5621
    options = CurrentOptions(options);
L
Lei Jin 已提交
5622
    CreateAndReopenWithCF({"pikachu"}, options);
5623

5624 5625
    // We must have at most one file per level except for level-0,
    // which may have up to kL0_StopWritesTrigger files.
5626 5627
    const int kMaxFiles =
        options.num_levels + options.level0_stop_writes_trigger;
5628

5629
    Random rnd(301);
5630 5631
    std::string value =
        RandomString(&rnd, static_cast<int>(2 * options.write_buffer_size));
5632
    for (int i = 0; i < 5 * kMaxFiles; i++) {
5633 5634
      ASSERT_OK(Put(1, "key", value));
      ASSERT_LE(TotalTableFiles(1), kMaxFiles);
5635 5636
    }
  } while (ChangeCompactOptions());
5637 5638
}

I
Igor Sugak 已提交
5639
TEST_F(DBTest, InPlaceUpdate) {
5640
  do {
5641
    Options options;
5642 5643 5644 5645
    options.create_if_missing = true;
    options.inplace_update_support = true;
    options.env = env_;
    options.write_buffer_size = 100000;
5646
    options = CurrentOptions(options);
L
Lei Jin 已提交
5647
    CreateAndReopenWithCF({"pikachu"}, options);
5648 5649 5650 5651 5652

    // Update key with values of smaller size
    int numValues = 10;
    for (int i = numValues; i > 0; i--) {
      std::string value = DummyString(i, 'a');
5653 5654
      ASSERT_OK(Put(1, "key", value));
      ASSERT_EQ(value, Get(1, "key"));
5655 5656 5657
    }

    // Only 1 instance for that key.
5658
    validateNumberOfEntries(1, 1);
5659 5660 5661 5662

  } while (ChangeCompactOptions());
}

I
Igor Sugak 已提交
5663
TEST_F(DBTest, InPlaceUpdateLargeNewValue) {
5664
  do {
5665
    Options options;
5666 5667 5668 5669
    options.create_if_missing = true;
    options.inplace_update_support = true;
    options.env = env_;
    options.write_buffer_size = 100000;
5670
    options = CurrentOptions(options);
L
Lei Jin 已提交
5671
    CreateAndReopenWithCF({"pikachu"}, options);
5672 5673

    // Update key with values of larger size
5674
    int numValues = 10;
5675 5676
    for (int i = 0; i < numValues; i++) {
      std::string value = DummyString(i, 'a');
5677 5678
      ASSERT_OK(Put(1, "key", value));
      ASSERT_EQ(value, Get(1, "key"));
5679 5680 5681
    }

    // All 10 updates exist in the internal iterator
5682
    validateNumberOfEntries(numValues, 1);
5683

5684 5685 5686
  } while (ChangeCompactOptions());
}

I
Igor Sugak 已提交
5687
TEST_F(DBTest, InPlaceUpdateCallbackSmallerSize) {
5688
  do {
5689
    Options options;
5690 5691 5692 5693 5694 5695
    options.create_if_missing = true;
    options.inplace_update_support = true;

    options.env = env_;
    options.write_buffer_size = 100000;
    options.inplace_callback =
5696
      rocksdb::DBTest::updateInPlaceSmallerSize;
5697
    options = CurrentOptions(options);
L
Lei Jin 已提交
5698
    CreateAndReopenWithCF({"pikachu"}, options);
5699 5700 5701

    // Update key with values of smaller size
    int numValues = 10;
5702 5703
    ASSERT_OK(Put(1, "key", DummyString(numValues, 'a')));
    ASSERT_EQ(DummyString(numValues, 'c'), Get(1, "key"));
5704 5705

    for (int i = numValues; i > 0; i--) {
5706 5707
      ASSERT_OK(Put(1, "key", DummyString(i, 'a')));
      ASSERT_EQ(DummyString(i - 1, 'b'), Get(1, "key"));
5708
    }
5709 5710

    // Only 1 instance for that key.
5711
    validateNumberOfEntries(1, 1);
5712 5713 5714 5715

  } while (ChangeCompactOptions());
}

I
Igor Sugak 已提交
5716
TEST_F(DBTest, InPlaceUpdateCallbackSmallerVarintSize) {
5717
  do {
5718
    Options options;
5719 5720 5721 5722 5723 5724 5725
    options.create_if_missing = true;
    options.inplace_update_support = true;

    options.env = env_;
    options.write_buffer_size = 100000;
    options.inplace_callback =
      rocksdb::DBTest::updateInPlaceSmallerVarintSize;
5726
    options = CurrentOptions(options);
L
Lei Jin 已提交
5727
    CreateAndReopenWithCF({"pikachu"}, options);
5728 5729 5730

    // Update key with values of smaller varint size
    int numValues = 265;
5731 5732
    ASSERT_OK(Put(1, "key", DummyString(numValues, 'a')));
    ASSERT_EQ(DummyString(numValues, 'c'), Get(1, "key"));
5733 5734

    for (int i = numValues; i > 0; i--) {
5735 5736
      ASSERT_OK(Put(1, "key", DummyString(i, 'a')));
      ASSERT_EQ(DummyString(1, 'b'), Get(1, "key"));
5737 5738 5739
    }

    // Only 1 instance for that key.
5740
    validateNumberOfEntries(1, 1);
5741

5742 5743 5744
  } while (ChangeCompactOptions());
}

I
Igor Sugak 已提交
5745
TEST_F(DBTest, InPlaceUpdateCallbackLargeNewValue) {
5746
  do {
5747
    Options options;
5748 5749 5750 5751 5752 5753
    options.create_if_missing = true;
    options.inplace_update_support = true;

    options.env = env_;
    options.write_buffer_size = 100000;
    options.inplace_callback =
5754
      rocksdb::DBTest::updateInPlaceLargerSize;
5755
    options = CurrentOptions(options);
L
Lei Jin 已提交
5756
    CreateAndReopenWithCF({"pikachu"}, options);
5757 5758 5759

    // Update key with values of larger size
    int numValues = 10;
5760
    for (int i = 0; i < numValues; i++) {
5761 5762
      ASSERT_OK(Put(1, "key", DummyString(i, 'a')));
      ASSERT_EQ(DummyString(i, 'c'), Get(1, "key"));
5763 5764 5765
    }

    // No inplace updates. All updates are puts with new seq number
5766
    // All 10 updates exist in the internal iterator
5767
    validateNumberOfEntries(numValues, 1);
5768 5769 5770 5771

  } while (ChangeCompactOptions());
}

I
Igor Sugak 已提交
5772
TEST_F(DBTest, InPlaceUpdateCallbackNoAction) {
5773
  do {
5774
    Options options;
5775 5776 5777 5778 5779 5780 5781
    options.create_if_missing = true;
    options.inplace_update_support = true;

    options.env = env_;
    options.write_buffer_size = 100000;
    options.inplace_callback =
      rocksdb::DBTest::updateInPlaceNoAction;
5782
    options = CurrentOptions(options);
L
Lei Jin 已提交
5783
    CreateAndReopenWithCF({"pikachu"}, options);
5784

5785
    // Callback function requests no actions from db
5786 5787
    ASSERT_OK(Put(1, "key", DummyString(1, 'a')));
    ASSERT_EQ(Get(1, "key"), "NOT_FOUND");
5788 5789 5790 5791

  } while (ChangeCompactOptions());
}

I
Igor Sugak 已提交
5792
TEST_F(DBTest, CompactionFilter) {
5793
  Options options = CurrentOptions();
5794
  options.max_open_files = -1;
5795 5796
  options.num_levels = 3;
  options.max_mem_compaction_level = 0;
5797
  options.compaction_filter_factory = std::make_shared<KeepFilterFactory>();
5798
  options = CurrentOptions(options);
L
Lei Jin 已提交
5799
  CreateAndReopenWithCF({"pikachu"}, options);
5800

5801
  // Write 100K keys, these are written to a few files in L0.
5802
  const std::string value(10, 'x');
5803
  for (int i = 0; i < 100000; i++) {
5804 5805
    char key[100];
    snprintf(key, sizeof(key), "B%010d", i);
5806
    Put(1, key, value);
5807
  }
5808
  ASSERT_OK(Flush(1));
5809 5810 5811 5812 5813

  // Push all files to the highest level L2. Verify that
  // the compaction is each level invokes the filter for
  // all the keys in that level.
  cfilter_count = 0;
5814
  dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
5815 5816
  ASSERT_EQ(cfilter_count, 100000);
  cfilter_count = 0;
5817
  dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
5818 5819
  ASSERT_EQ(cfilter_count, 100000);

5820 5821 5822
  ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
  ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0);
  ASSERT_NE(NumTableFilesAtLevel(2, 1), 0);
5823 5824
  cfilter_count = 0;

5825 5826 5827 5828 5829
  // All the files are in the lowest level.
  // Verify that all but the 100001st record
  // has sequence number zero. The 100001st record
  // is at the tip of this snapshot and cannot
  // be zeroed out.
5830
  // TODO: figure out sequence number squashtoo
5831 5832
  int count = 0;
  int total = 0;
5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847
  Arena arena;
  {
    ScopedArenaIterator iter(
        dbfull()->TEST_NewInternalIterator(&arena, handles_[1]));
    iter->SeekToFirst();
    ASSERT_OK(iter->status());
    while (iter->Valid()) {
      ParsedInternalKey ikey(Slice(), 0, kTypeValue);
      ikey.sequence = -1;
      ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true);
      total++;
      if (ikey.sequence != 0) {
        count++;
      }
      iter->Next();
5848 5849
    }
  }
5850
  ASSERT_EQ(total, 100000);
5851 5852
  ASSERT_EQ(count, 1);

5853 5854
  // overwrite all the 100K keys once again.
  for (int i = 0; i < 100000; i++) {
5855 5856
    char key[100];
    snprintf(key, sizeof(key), "B%010d", i);
5857
    ASSERT_OK(Put(1, key, value));
5858
  }
5859
  ASSERT_OK(Flush(1));
5860 5861 5862 5863 5864

  // push all files to the highest level L2. This
  // means that all keys should pass at least once
  // via the compaction filter
  cfilter_count = 0;
5865
  dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
5866 5867
  ASSERT_EQ(cfilter_count, 100000);
  cfilter_count = 0;
5868
  dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
5869
  ASSERT_EQ(cfilter_count, 100000);
5870 5871 5872
  ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
  ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0);
  ASSERT_NE(NumTableFilesAtLevel(2, 1), 0);
5873 5874 5875

  // create a new database with the compaction
  // filter in such a way that it deletes all keys
5876
  options.compaction_filter_factory = std::make_shared<DeleteFilterFactory>();
5877
  options.create_if_missing = true;
L
Lei Jin 已提交
5878
  DestroyAndReopen(options);
L
Lei Jin 已提交
5879
  CreateAndReopenWithCF({"pikachu"}, options);
5880 5881

  // write all the keys once again.
5882
  for (int i = 0; i < 100000; i++) {
5883 5884
    char key[100];
    snprintf(key, sizeof(key), "B%010d", i);
5885
    ASSERT_OK(Put(1, key, value));
5886
  }
5887 5888 5889 5890
  ASSERT_OK(Flush(1));
  ASSERT_NE(NumTableFilesAtLevel(0, 1), 0);
  ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0);
  ASSERT_EQ(NumTableFilesAtLevel(2, 1), 0);
5891 5892 5893 5894 5895 5896

  // Push all files to the highest level L2. This
  // triggers the compaction filter to delete all keys,
  // verify that at the end of the compaction process,
  // nothing is left.
  cfilter_count = 0;
5897
  dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
5898 5899
  ASSERT_EQ(cfilter_count, 100000);
  cfilter_count = 0;
5900
  dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
5901
  ASSERT_EQ(cfilter_count, 0);
5902 5903
  ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
  ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0);
5904

I
Igor Canadi 已提交
5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915
  {
    // Scan the entire database to ensure that nothing is left
    std::unique_ptr<Iterator> iter(
        db_->NewIterator(ReadOptions(), handles_[1]));
    iter->SeekToFirst();
    count = 0;
    while (iter->Valid()) {
      count++;
      iter->Next();
    }
    ASSERT_EQ(count, 0);
5916 5917 5918
  }

  // The sequence number of the remaining record
5919
  // is not zeroed out even though it is at the
5920
  // level Lmax because this record is at the tip
5921 5922
  // TODO: remove the following or design a different
  // test
5923
  count = 0;
5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936
  {
    ScopedArenaIterator iter(
        dbfull()->TEST_NewInternalIterator(&arena, handles_[1]));
    iter->SeekToFirst();
    ASSERT_OK(iter->status());
    while (iter->Valid()) {
      ParsedInternalKey ikey(Slice(), 0, kTypeValue);
      ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true);
      ASSERT_NE(ikey.sequence, (unsigned)0);
      count++;
      iter->Next();
    }
    ASSERT_EQ(count, 0);
5937 5938 5939
  }
}

I
Igor Canadi 已提交
5940 5941 5942
// Tests the edge case where compaction does not produce any output -- all
// entries are deleted. The compaction should create bunch of 'DeleteFile'
// entries in VersionEdit, but none of the 'AddFile's.
I
Igor Sugak 已提交
5943
TEST_F(DBTest, CompactionFilterDeletesAll) {
I
Igor Canadi 已提交
5944 5945 5946 5947
  Options options;
  options.compaction_filter_factory = std::make_shared<DeleteFilterFactory>();
  options.disable_auto_compactions = true;
  options.create_if_missing = true;
5948
  options = CurrentOptions(options);
L
Lei Jin 已提交
5949
  DestroyAndReopen(options);
I
Igor Canadi 已提交
5950 5951 5952 5953

  // put some data
  for (int table = 0; table < 4; ++table) {
    for (int i = 0; i < 10 + table; ++i) {
5954
      Put(ToString(table * 100 + i), "val");
I
Igor Canadi 已提交
5955 5956 5957 5958 5959 5960
    }
    Flush();
  }

  // this will produce empty file (delete compaction filter)
  ASSERT_OK(db_->CompactRange(nullptr, nullptr));
5961
  ASSERT_EQ(0U, CountLiveFiles());
I
Igor Canadi 已提交
5962

L
Lei Jin 已提交
5963
  Reopen(options);
I
Igor Canadi 已提交
5964 5965 5966 5967 5968 5969 5970 5971 5972

  Iterator* itr = db_->NewIterator(ReadOptions());
  itr->SeekToFirst();
  // empty db
  ASSERT_TRUE(!itr->Valid());

  delete itr;
}

I
Igor Sugak 已提交
5973
TEST_F(DBTest, CompactionFilterWithValueChange) {
5974
  do {
5975
    Options options;
5976 5977
    options.num_levels = 3;
    options.max_mem_compaction_level = 0;
5978
    options.compaction_filter_factory =
5979
      std::make_shared<ChangeFilterFactory>();
5980
    options = CurrentOptions(options);
L
Lei Jin 已提交
5981
    CreateAndReopenWithCF({"pikachu"}, options);
5982

5983 5984 5985 5986 5987 5988 5989 5990 5991
    // Write 100K+1 keys, these are written to a few files
    // in L0. We do this so that the current snapshot points
    // to the 100001 key.The compaction filter is  not invoked
    // on keys that are visible via a snapshot because we
    // anyways cannot delete it.
    const std::string value(10, 'x');
    for (int i = 0; i < 100001; i++) {
      char key[100];
      snprintf(key, sizeof(key), "B%010d", i);
5992
      Put(1, key, value);
5993
    }
5994

5995
    // push all files to  lower levels
5996
    ASSERT_OK(Flush(1));
5997 5998 5999 6000 6001 6002
    if (option_config_ != kUniversalCompactionMultiLevel) {
      dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
      dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
    } else {
      dbfull()->CompactRange(handles_[1], nullptr, nullptr);
    }
6003

6004 6005 6006 6007
    // re-write all data again
    for (int i = 0; i < 100001; i++) {
      char key[100];
      snprintf(key, sizeof(key), "B%010d", i);
6008
      Put(1, key, value);
6009
    }
6010

6011 6012
    // push all files to  lower levels. This should
    // invoke the compaction filter for all 100000 keys.
6013
    ASSERT_OK(Flush(1));
6014 6015 6016 6017 6018 6019
    if (option_config_ != kUniversalCompactionMultiLevel) {
      dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
      dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
    } else {
      dbfull()->CompactRange(handles_[1], nullptr, nullptr);
    }
6020

6021 6022
    // verify that all keys now have the new value that
    // was set by the compaction process.
D
Danny Guo 已提交
6023
    for (int i = 0; i < 100001; i++) {
6024 6025
      char key[100];
      snprintf(key, sizeof(key), "B%010d", i);
6026
      std::string newvalue = Get(1, key);
6027 6028 6029
      ASSERT_EQ(newvalue.compare(NEW_VALUE), 0);
    }
  } while (ChangeCompactOptions());
6030 6031
}

I
Igor Sugak 已提交
6032
TEST_F(DBTest, CompactionFilterWithMergeOperator) {
6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100
  std::string one, two, three, four;
  PutFixed64(&one, 1);
  PutFixed64(&two, 2);
  PutFixed64(&three, 3);
  PutFixed64(&four, 4);

  Options options;
  options = CurrentOptions(options);
  options.create_if_missing = true;
  options.merge_operator = MergeOperators::CreateUInt64AddOperator();
  options.num_levels = 3;
  options.max_mem_compaction_level = 0;
  // Filter out keys with value is 2.
  options.compaction_filter_factory =
      std::make_shared<ConditionalFilterFactory>(two);
  DestroyAndReopen(options);

  // In the same compaction, a value type needs to be deleted based on
  // compaction filter, and there is a merge type for the key. compaction
  // filter result is ignored.
  ASSERT_OK(db_->Put(WriteOptions(), "foo", two));
  ASSERT_OK(Flush());
  ASSERT_OK(db_->Merge(WriteOptions(), "foo", one));
  ASSERT_OK(Flush());
  std::string newvalue = Get("foo");
  ASSERT_EQ(newvalue, three);
  dbfull()->CompactRange(nullptr, nullptr);
  newvalue = Get("foo");
  ASSERT_EQ(newvalue, three);

  // value key can be deleted based on compaction filter, leaving only
  // merge keys.
  ASSERT_OK(db_->Put(WriteOptions(), "bar", two));
  ASSERT_OK(Flush());
  dbfull()->CompactRange(nullptr, nullptr);
  newvalue = Get("bar");
  ASSERT_EQ("NOT_FOUND", newvalue);
  ASSERT_OK(db_->Merge(WriteOptions(), "bar", two));
  ASSERT_OK(Flush());
  dbfull()->CompactRange(nullptr, nullptr);
  newvalue = Get("bar");
  ASSERT_EQ(two, two);

  // Compaction filter never applies to merge keys.
  ASSERT_OK(db_->Put(WriteOptions(), "foobar", one));
  ASSERT_OK(Flush());
  ASSERT_OK(db_->Merge(WriteOptions(), "foobar", two));
  ASSERT_OK(Flush());
  newvalue = Get("foobar");
  ASSERT_EQ(newvalue, three);
  dbfull()->CompactRange(nullptr, nullptr);
  newvalue = Get("foobar");
  ASSERT_EQ(newvalue, three);

  // In the same compaction, both of value type and merge type keys need to be
  // deleted based on compaction filter, and there is a merge type for the key.
  // For both keys, compaction filter results are ignored.
  ASSERT_OK(db_->Put(WriteOptions(), "barfoo", two));
  ASSERT_OK(Flush());
  ASSERT_OK(db_->Merge(WriteOptions(), "barfoo", two));
  ASSERT_OK(Flush());
  newvalue = Get("barfoo");
  ASSERT_EQ(newvalue, four);
  dbfull()->CompactRange(nullptr, nullptr);
  newvalue = Get("barfoo");
  ASSERT_EQ(newvalue, four);
}

I
Igor Sugak 已提交
6101
TEST_F(DBTest, CompactionFilterContextManual) {
6102 6103 6104 6105 6106 6107 6108
  KeepFilterFactory* filter = new KeepFilterFactory();

  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleUniversal;
  options.compaction_filter_factory.reset(filter);
  options.compression = kNoCompression;
  options.level0_file_num_compaction_trigger = 8;
L
Lei Jin 已提交
6109
  Reopen(options);
6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131
  int num_keys_per_file = 400;
  for (int j = 0; j < 3; j++) {
    // Write several keys.
    const std::string value(10, 'x');
    for (int i = 0; i < num_keys_per_file; i++) {
      char key[100];
      snprintf(key, sizeof(key), "B%08d%02d", i, j);
      Put(key, value);
    }
    dbfull()->TEST_FlushMemTable();
    // Make sure next file is much smaller so automatic compaction will not
    // be triggered.
    num_keys_per_file /= 2;
  }

  // Force a manual compaction
  cfilter_count = 0;
  filter->expect_manual_compaction_.store(true);
  filter->expect_full_compaction_.store(false);  // Manual compaction always
                                                 // set this flag.
  dbfull()->CompactRange(nullptr, nullptr);
  ASSERT_EQ(cfilter_count, 700);
S
sdong 已提交
6132
  ASSERT_EQ(NumSortedRuns(0), 1);
6133 6134

  // Verify total number of keys is correct after manual compaction.
6135
  {
6136 6137
    int count = 0;
    int total = 0;
6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150
    Arena arena;
    ScopedArenaIterator iter(dbfull()->TEST_NewInternalIterator(&arena));
    iter->SeekToFirst();
    ASSERT_OK(iter->status());
    while (iter->Valid()) {
      ParsedInternalKey ikey(Slice(), 0, kTypeValue);
      ikey.sequence = -1;
      ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true);
      total++;
      if (ikey.sequence != 0) {
        count++;
      }
      iter->Next();
6151
    }
6152 6153
    ASSERT_EQ(total, 700);
    ASSERT_EQ(count, 1);
6154 6155 6156
  }
}

D
Danny Guo 已提交
6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276
class KeepFilterV2 : public CompactionFilterV2 {
 public:
  virtual std::vector<bool> Filter(int level,
                                   const SliceVector& keys,
                                   const SliceVector& existing_values,
                                   std::vector<std::string>* new_values,
                                   std::vector<bool>* values_changed)
    const override {
    cfilter_count++;
    std::vector<bool> ret;
    new_values->clear();
    values_changed->clear();
    for (unsigned int i = 0; i < keys.size(); ++i) {
      values_changed->push_back(false);
      ret.push_back(false);
    }
    return ret;
  }

  virtual const char* Name() const override {
    return "KeepFilterV2";
  }
};

class DeleteFilterV2 : public CompactionFilterV2 {
 public:
  virtual std::vector<bool> Filter(int level,
                                   const SliceVector& keys,
                                   const SliceVector& existing_values,
                                   std::vector<std::string>* new_values,
                                   std::vector<bool>* values_changed)
    const override {
    cfilter_count++;
    new_values->clear();
    values_changed->clear();
    std::vector<bool> ret;
    for (unsigned int i = 0; i < keys.size(); ++i) {
      values_changed->push_back(false);
      ret.push_back(true);
    }
    return ret;
  }

  virtual const char* Name() const override {
    return "DeleteFilterV2";
  }
};

class ChangeFilterV2 : public CompactionFilterV2 {
 public:
  virtual std::vector<bool> Filter(int level,
                                   const SliceVector& keys,
                                   const SliceVector& existing_values,
                                   std::vector<std::string>* new_values,
                                   std::vector<bool>* values_changed)
    const override {
    std::vector<bool> ret;
    new_values->clear();
    values_changed->clear();
    for (unsigned int i = 0; i < keys.size(); ++i) {
      values_changed->push_back(true);
      new_values->push_back(NEW_VALUE);
      ret.push_back(false);
    }
    return ret;
  }

  virtual const char* Name() const override {
    return "ChangeFilterV2";
  }
};

class KeepFilterFactoryV2 : public CompactionFilterFactoryV2 {
 public:
  explicit KeepFilterFactoryV2(const SliceTransform* prefix_extractor)
    : CompactionFilterFactoryV2(prefix_extractor) { }

  virtual std::unique_ptr<CompactionFilterV2>
  CreateCompactionFilterV2(
      const CompactionFilterContext& context) override {
    return std::unique_ptr<CompactionFilterV2>(new KeepFilterV2());
  }

  virtual const char* Name() const override {
    return "KeepFilterFactoryV2";
  }
};

class DeleteFilterFactoryV2 : public CompactionFilterFactoryV2 {
 public:
  explicit DeleteFilterFactoryV2(const SliceTransform* prefix_extractor)
    : CompactionFilterFactoryV2(prefix_extractor) { }

  virtual std::unique_ptr<CompactionFilterV2>
  CreateCompactionFilterV2(
      const CompactionFilterContext& context) override {
    return std::unique_ptr<CompactionFilterV2>(new DeleteFilterV2());
  }

  virtual const char* Name() const override {
    return "DeleteFilterFactoryV2";
  }
};

class ChangeFilterFactoryV2 : public CompactionFilterFactoryV2 {
 public:
  explicit ChangeFilterFactoryV2(const SliceTransform* prefix_extractor)
    : CompactionFilterFactoryV2(prefix_extractor) { }

  virtual std::unique_ptr<CompactionFilterV2>
  CreateCompactionFilterV2(
      const CompactionFilterContext& context) override {
    return std::unique_ptr<CompactionFilterV2>(new ChangeFilterV2());
  }

  virtual const char* Name() const override {
    return "ChangeFilterFactoryV2";
  }
};

I
Igor Sugak 已提交
6277
TEST_F(DBTest, CompactionFilterV2) {
D
Danny Guo 已提交
6278 6279 6280 6281
  Options options = CurrentOptions();
  options.num_levels = 3;
  options.max_mem_compaction_level = 0;
  // extract prefix
I
Igor Canadi 已提交
6282 6283 6284
  std::unique_ptr<const SliceTransform> prefix_extractor;
  prefix_extractor.reset(NewFixedPrefixTransform(8));

D
Danny Guo 已提交
6285
  options.compaction_filter_factory_v2
I
Igor Canadi 已提交
6286
    = std::make_shared<KeepFilterFactoryV2>(prefix_extractor.get());
D
Danny Guo 已提交
6287 6288 6289 6290
  // In a testing environment, we can only flush the application
  // compaction filter buffer using universal compaction
  option_config_ = kUniversalCompaction;
  options.compaction_style = (rocksdb::CompactionStyle)1;
L
Lei Jin 已提交
6291
  Reopen(options);
D
Danny Guo 已提交
6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305

  // Write 100K keys, these are written to a few files in L0.
  const std::string value(10, 'x');
  for (int i = 0; i < 100000; i++) {
    char key[100];
    snprintf(key, sizeof(key), "B%08d%010d", i , i);
    Put(key, value);
  }

  dbfull()->TEST_FlushMemTable();

  dbfull()->TEST_CompactRange(0, nullptr, nullptr);
  dbfull()->TEST_CompactRange(1, nullptr, nullptr);

S
sdong 已提交
6306
  ASSERT_EQ(NumSortedRuns(0), 1);
D
Danny Guo 已提交
6307 6308 6309 6310

  // All the files are in the lowest level.
  int count = 0;
  int total = 0;
6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324
  {
    Arena arena;
    ScopedArenaIterator iter(dbfull()->TEST_NewInternalIterator(&arena));
    iter->SeekToFirst();
    ASSERT_OK(iter->status());
    while (iter->Valid()) {
      ParsedInternalKey ikey(Slice(), 0, kTypeValue);
      ikey.sequence = -1;
      ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true);
      total++;
      if (ikey.sequence != 0) {
        count++;
      }
      iter->Next();
D
Danny Guo 已提交
6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335
    }
  }

  ASSERT_EQ(total, 100000);
  // 1 snapshot only. Since we are using universal compacton,
  // the sequence no is cleared for better compression
  ASSERT_EQ(count, 1);

  // create a new database with the compaction
  // filter in such a way that it deletes all keys
  options.compaction_filter_factory_v2 =
I
Igor Canadi 已提交
6336
    std::make_shared<DeleteFilterFactoryV2>(prefix_extractor.get());
D
Danny Guo 已提交
6337
  options.create_if_missing = true;
L
Lei Jin 已提交
6338
  DestroyAndReopen(options);
D
Danny Guo 已提交
6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354

  // write all the keys once again.
  for (int i = 0; i < 100000; i++) {
    char key[100];
    snprintf(key, sizeof(key), "B%08d%010d", i, i);
    Put(key, value);
  }

  dbfull()->TEST_FlushMemTable();
  ASSERT_NE(NumTableFilesAtLevel(0), 0);

  dbfull()->TEST_CompactRange(0, nullptr, nullptr);
  dbfull()->TEST_CompactRange(1, nullptr, nullptr);
  ASSERT_EQ(NumTableFilesAtLevel(1), 0);

  // Scan the entire database to ensure that nothing is left
6355
  Iterator* iter = db_->NewIterator(ReadOptions());
D
Danny Guo 已提交
6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366
  iter->SeekToFirst();
  count = 0;
  while (iter->Valid()) {
    count++;
    iter->Next();
  }

  ASSERT_EQ(count, 0);
  delete iter;
}

I
Igor Sugak 已提交
6367
TEST_F(DBTest, CompactionFilterV2WithValueChange) {
D
Danny Guo 已提交
6368 6369 6370
  Options options = CurrentOptions();
  options.num_levels = 3;
  options.max_mem_compaction_level = 0;
I
Igor Canadi 已提交
6371 6372
  std::unique_ptr<const SliceTransform> prefix_extractor;
  prefix_extractor.reset(NewFixedPrefixTransform(8));
D
Danny Guo 已提交
6373
  options.compaction_filter_factory_v2 =
I
Igor Canadi 已提交
6374
    std::make_shared<ChangeFilterFactoryV2>(prefix_extractor.get());
D
Danny Guo 已提交
6375 6376 6377 6378
  // In a testing environment, we can only flush the application
  // compaction filter buffer using universal compaction
  option_config_ = kUniversalCompaction;
  options.compaction_style = (rocksdb::CompactionStyle)1;
6379
  options = CurrentOptions(options);
L
Lei Jin 已提交
6380
  Reopen(options);
D
Danny Guo 已提交
6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408

  // Write 100K+1 keys, these are written to a few files
  // in L0. We do this so that the current snapshot points
  // to the 100001 key.The compaction filter is  not invoked
  // on keys that are visible via a snapshot because we
  // anyways cannot delete it.
  const std::string value(10, 'x');
  for (int i = 0; i < 100001; i++) {
    char key[100];
    snprintf(key, sizeof(key), "B%08d%010d", i, i);
    Put(key, value);
  }

  // push all files to lower levels
  dbfull()->TEST_FlushMemTable();
  dbfull()->TEST_CompactRange(0, nullptr, nullptr);
  dbfull()->TEST_CompactRange(1, nullptr, nullptr);

  // verify that all keys now have the new value that
  // was set by the compaction process.
  for (int i = 0; i < 100001; i++) {
    char key[100];
    snprintf(key, sizeof(key), "B%08d%010d", i, i);
    std::string newvalue = Get(key);
    ASSERT_EQ(newvalue.compare(NEW_VALUE), 0);
  }
}

I
Igor Sugak 已提交
6409
TEST_F(DBTest, CompactionFilterV2NULLPrefix) {
6410 6411 6412
  Options options = CurrentOptions();
  options.num_levels = 3;
  options.max_mem_compaction_level = 0;
I
Igor Canadi 已提交
6413 6414
  std::unique_ptr<const SliceTransform> prefix_extractor;
  prefix_extractor.reset(NewFixedPrefixTransform(8));
6415
  options.compaction_filter_factory_v2 =
I
Igor Canadi 已提交
6416
    std::make_shared<ChangeFilterFactoryV2>(prefix_extractor.get());
6417 6418 6419 6420
  // In a testing environment, we can only flush the application
  // compaction filter buffer using universal compaction
  option_config_ = kUniversalCompaction;
  options.compaction_style = (rocksdb::CompactionStyle)1;
L
Lei Jin 已提交
6421
  Reopen(options);
6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454

  // Write 100K+1 keys, these are written to a few files
  // in L0. We do this so that the current snapshot points
  // to the 100001 key.The compaction filter is  not invoked
  // on keys that are visible via a snapshot because we
  // anyways cannot delete it.
  const std::string value(10, 'x');
  char first_key[100];
  snprintf(first_key, sizeof(first_key), "%s0000%010d", "NULL", 1);
  Put(first_key, value);
  for (int i = 1; i < 100000; i++) {
    char key[100];
    snprintf(key, sizeof(key), "%08d%010d", i, i);
    Put(key, value);
  }

  char last_key[100];
  snprintf(last_key, sizeof(last_key), "%s0000%010d", "NULL", 2);
  Put(last_key, value);

  // push all files to lower levels
  dbfull()->TEST_FlushMemTable();
  dbfull()->TEST_CompactRange(0, nullptr, nullptr);

  // verify that all keys now have the new value that
  // was set by the compaction process.
  std::string newvalue = Get(first_key);
  ASSERT_EQ(newvalue.compare(NEW_VALUE), 0);
  newvalue = Get(last_key);
  ASSERT_EQ(newvalue.compare(NEW_VALUE), 0);
  for (int i = 1; i < 100000; i++) {
    char key[100];
    snprintf(key, sizeof(key), "%08d%010d", i, i);
I
Igor Canadi 已提交
6455
    newvalue = Get(key);
6456 6457 6458 6459
    ASSERT_EQ(newvalue.compare(NEW_VALUE), 0);
  }
}

I
Igor Sugak 已提交
6460
TEST_F(DBTest, SparseMerge) {
6461 6462 6463
  do {
    Options options = CurrentOptions();
    options.compression = kNoCompression;
L
Lei Jin 已提交
6464
    CreateAndReopenWithCF({"pikachu"}, options);
6465

6466
    FillLevels("A", "Z", 1);
6467 6468 6469 6470 6471 6472 6473 6474

    // Suppose there is:
    //    small amount of data with prefix A
    //    large amount of data with prefix B
    //    small amount of data with prefix C
    // and that recent updates have made small changes to all three prefixes.
    // Check that we do not do a compaction that merges all of B in one shot.
    const std::string value(1000, 'x');
6475
    Put(1, "A", "va");
6476 6477 6478 6479
    // Write approximately 100MB of "B" values
    for (int i = 0; i < 100000; i++) {
      char key[100];
      snprintf(key, sizeof(key), "B%010d", i);
6480
      Put(1, key, value);
6481
    }
6482 6483 6484
    Put(1, "C", "vc");
    ASSERT_OK(Flush(1));
    dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
6485

6486
    // Make sparse update
6487 6488 6489 6490
    Put(1, "A", "va2");
    Put(1, "B100", "bvalue2");
    Put(1, "C", "vc2");
    ASSERT_OK(Flush(1));
6491 6492 6493

    // Compactions should not cause us to create a situation where
    // a file overlaps too much data at the next level.
6494 6495
    ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
              20 * 1048576);
6496
    dbfull()->TEST_CompactRange(0, nullptr, nullptr);
6497 6498
    ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
              20 * 1048576);
6499
    dbfull()->TEST_CompactRange(1, nullptr, nullptr);
6500 6501
    ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
              20 * 1048576);
6502
  } while (ChangeCompactOptions());
6503 6504
}

J
jorlow@chromium.org 已提交
6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515
static bool Between(uint64_t val, uint64_t low, uint64_t high) {
  bool result = (val >= low) && (val <= high);
  if (!result) {
    fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
            (unsigned long long)(val),
            (unsigned long long)(low),
            (unsigned long long)(high));
  }
  return result;
}

I
Igor Sugak 已提交
6516
TEST_F(DBTest, ApproximateSizes) {
S
Sanjay Ghemawat 已提交
6517
  do {
6518
    Options options;
S
Sanjay Ghemawat 已提交
6519 6520
    options.write_buffer_size = 100000000;        // Large write buffer
    options.compression = kNoCompression;
L
Lei Jin 已提交
6521
    options.create_if_missing = true;
6522
    options = CurrentOptions(options);
L
Lei Jin 已提交
6523
    DestroyAndReopen(options);
L
Lei Jin 已提交
6524
    CreateAndReopenWithCF({"pikachu"}, options);
J
jorlow@chromium.org 已提交
6525

6526
    ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0));
L
Lei Jin 已提交
6527
    ReopenWithColumnFamilies({"default", "pikachu"}, options);
6528
    ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0));
J
jorlow@chromium.org 已提交
6529

S
Sanjay Ghemawat 已提交
6530
    // Write 8MB (80 values, each 100K)
6531
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
S
Sanjay Ghemawat 已提交
6532 6533 6534 6535 6536
    const int N = 80;
    static const int S1 = 100000;
    static const int S2 = 105000;  // Allow some expansion from metadata
    Random rnd(301);
    for (int i = 0; i < N; i++) {
6537
      ASSERT_OK(Put(1, Key(i), RandomString(&rnd, S1)));
S
Sanjay Ghemawat 已提交
6538
    }
J
jorlow@chromium.org 已提交
6539

S
Sanjay Ghemawat 已提交
6540
    // 0 because GetApproximateSizes() does not account for memtable space
6541
    ASSERT_TRUE(Between(Size("", Key(50), 1), 0, 0));
D
 
dgrogan@chromium.org 已提交
6542

S
Sanjay Ghemawat 已提交
6543 6544
    // Check sizes across recovery by reopening a few times
    for (int run = 0; run < 3; run++) {
L
Lei Jin 已提交
6545
      ReopenWithColumnFamilies({"default", "pikachu"}, options);
D
dgrogan@chromium.org 已提交
6546

S
Sanjay Ghemawat 已提交
6547 6548
      for (int compact_start = 0; compact_start < N; compact_start += 10) {
        for (int i = 0; i < N; i += 10) {
6549 6550 6551 6552
          ASSERT_TRUE(Between(Size("", Key(i), 1), S1 * i, S2 * i));
          ASSERT_TRUE(Between(Size("", Key(i) + ".suffix", 1), S1 * (i + 1),
                              S2 * (i + 1)));
          ASSERT_TRUE(Between(Size(Key(i), Key(i + 10), 1), S1 * 10, S2 * 10));
S
Sanjay Ghemawat 已提交
6553
        }
6554 6555 6556
        ASSERT_TRUE(Between(Size("", Key(50), 1), S1 * 50, S2 * 50));
        ASSERT_TRUE(
            Between(Size("", Key(50) + ".suffix", 1), S1 * 50, S2 * 50));
S
Sanjay Ghemawat 已提交
6557 6558 6559 6560 6561

        std::string cstart_str = Key(compact_start);
        std::string cend_str = Key(compact_start + 9);
        Slice cstart = cstart_str;
        Slice cend = cend_str;
6562
        dbfull()->TEST_CompactRange(0, &cstart, &cend, handles_[1]);
J
jorlow@chromium.org 已提交
6563
      }
D
dgrogan@chromium.org 已提交
6564

6565 6566
      ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
      ASSERT_GT(NumTableFilesAtLevel(1, 1), 0);
S
Sanjay Ghemawat 已提交
6567
    }
6568
    // ApproximateOffsetOf() is not yet implemented in plain table format.
I
Igor Canadi 已提交
6569
  } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
S
Stanislau Hlebik 已提交
6570
                         kSkipPlainTable | kSkipHashIndex));
J
jorlow@chromium.org 已提交
6571 6572
}

I
Igor Sugak 已提交
6573
TEST_F(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
S
Sanjay Ghemawat 已提交
6574 6575 6576
  do {
    Options options = CurrentOptions();
    options.compression = kNoCompression;
L
Lei Jin 已提交
6577
    CreateAndReopenWithCF({"pikachu"}, options);
J
jorlow@chromium.org 已提交
6578

S
Sanjay Ghemawat 已提交
6579 6580
    Random rnd(301);
    std::string big1 = RandomString(&rnd, 100000);
6581 6582 6583 6584 6585 6586 6587 6588
    ASSERT_OK(Put(1, Key(0), RandomString(&rnd, 10000)));
    ASSERT_OK(Put(1, Key(1), RandomString(&rnd, 10000)));
    ASSERT_OK(Put(1, Key(2), big1));
    ASSERT_OK(Put(1, Key(3), RandomString(&rnd, 10000)));
    ASSERT_OK(Put(1, Key(4), big1));
    ASSERT_OK(Put(1, Key(5), RandomString(&rnd, 10000)));
    ASSERT_OK(Put(1, Key(6), RandomString(&rnd, 300000)));
    ASSERT_OK(Put(1, Key(7), RandomString(&rnd, 10000)));
S
Sanjay Ghemawat 已提交
6589 6590 6591

    // Check sizes across recovery by reopening a few times
    for (int run = 0; run < 3; run++) {
L
Lei Jin 已提交
6592
      ReopenWithColumnFamilies({"default", "pikachu"}, options);
S
Sanjay Ghemawat 已提交
6593

6594 6595 6596 6597 6598 6599 6600 6601 6602
      ASSERT_TRUE(Between(Size("", Key(0), 1), 0, 0));
      ASSERT_TRUE(Between(Size("", Key(1), 1), 10000, 11000));
      ASSERT_TRUE(Between(Size("", Key(2), 1), 20000, 21000));
      ASSERT_TRUE(Between(Size("", Key(3), 1), 120000, 121000));
      ASSERT_TRUE(Between(Size("", Key(4), 1), 130000, 131000));
      ASSERT_TRUE(Between(Size("", Key(5), 1), 230000, 231000));
      ASSERT_TRUE(Between(Size("", Key(6), 1), 240000, 241000));
      ASSERT_TRUE(Between(Size("", Key(7), 1), 540000, 541000));
      ASSERT_TRUE(Between(Size("", Key(8), 1), 550000, 560000));
S
Sanjay Ghemawat 已提交
6603

6604
      ASSERT_TRUE(Between(Size(Key(3), Key(5), 1), 110000, 111000));
S
Sanjay Ghemawat 已提交
6605

6606
      dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
S
Sanjay Ghemawat 已提交
6607
    }
6608 6609
    // ApproximateOffsetOf() is not yet implemented in plain table format.
  } while (ChangeOptions(kSkipPlainTable));
J
jorlow@chromium.org 已提交
6610 6611
}

I
Igor Sugak 已提交
6612
TEST_F(DBTest, IteratorPinsRef) {
6613
  do {
L
Lei Jin 已提交
6614
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
6615
    Put(1, "foo", "hello");
J
jorlow@chromium.org 已提交
6616

6617
    // Get iterator that will yield the current contents of the DB.
6618
    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
J
jorlow@chromium.org 已提交
6619

6620
    // Write to force compactions
6621
    Put(1, "foo", "newvalue1");
6622
    for (int i = 0; i < 100; i++) {
6623 6624
      // 100K values
      ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v')));
6625
    }
6626
    Put(1, "foo", "newvalue2");
J
jorlow@chromium.org 已提交
6627

6628 6629 6630 6631 6632 6633 6634 6635
    iter->SeekToFirst();
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("foo", iter->key().ToString());
    ASSERT_EQ("hello", iter->value().ToString());
    iter->Next();
    ASSERT_TRUE(!iter->Valid());
    delete iter;
  } while (ChangeCompactOptions());
J
jorlow@chromium.org 已提交
6636 6637
}

I
Igor Sugak 已提交
6638
TEST_F(DBTest, Snapshot) {
6639 6640
  anon::OptionsOverride options_override;
  options_override.skip_policy = kSkipNoSnapshot;
S
Sanjay Ghemawat 已提交
6641
  do {
6642
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
6643 6644
    Put(0, "foo", "0v1");
    Put(1, "foo", "1v1");
6645

S
Sanjay Ghemawat 已提交
6646
    const Snapshot* s1 = db_->GetSnapshot();
6647 6648 6649
    ASSERT_EQ(1U, GetNumSnapshots());
    uint64_t time_snap1 = GetTimeOldestSnapshots();
    ASSERT_GT(time_snap1, 0U);
6650 6651
    Put(0, "foo", "0v2");
    Put(1, "foo", "1v2");
6652 6653 6654

    env_->addon_time_++;

S
Sanjay Ghemawat 已提交
6655
    const Snapshot* s2 = db_->GetSnapshot();
6656 6657
    ASSERT_EQ(2U, GetNumSnapshots());
    ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
6658 6659
    Put(0, "foo", "0v3");
    Put(1, "foo", "1v3");
6660

S
Sanjay Ghemawat 已提交
6661
    const Snapshot* s3 = db_->GetSnapshot();
6662 6663
    ASSERT_EQ(3U, GetNumSnapshots());
    ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
S
Sanjay Ghemawat 已提交
6664

6665 6666 6667 6668 6669 6670 6671 6672 6673 6674
    Put(0, "foo", "0v4");
    Put(1, "foo", "1v4");
    ASSERT_EQ("0v1", Get(0, "foo", s1));
    ASSERT_EQ("1v1", Get(1, "foo", s1));
    ASSERT_EQ("0v2", Get(0, "foo", s2));
    ASSERT_EQ("1v2", Get(1, "foo", s2));
    ASSERT_EQ("0v3", Get(0, "foo", s3));
    ASSERT_EQ("1v3", Get(1, "foo", s3));
    ASSERT_EQ("0v4", Get(0, "foo"));
    ASSERT_EQ("1v4", Get(1, "foo"));
S
Sanjay Ghemawat 已提交
6675 6676

    db_->ReleaseSnapshot(s3);
6677 6678
    ASSERT_EQ(2U, GetNumSnapshots());
    ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
6679 6680 6681 6682 6683 6684
    ASSERT_EQ("0v1", Get(0, "foo", s1));
    ASSERT_EQ("1v1", Get(1, "foo", s1));
    ASSERT_EQ("0v2", Get(0, "foo", s2));
    ASSERT_EQ("1v2", Get(1, "foo", s2));
    ASSERT_EQ("0v4", Get(0, "foo"));
    ASSERT_EQ("1v4", Get(1, "foo"));
J
jorlow@chromium.org 已提交
6685

S
Sanjay Ghemawat 已提交
6686
    db_->ReleaseSnapshot(s1);
6687 6688 6689 6690
    ASSERT_EQ("0v2", Get(0, "foo", s2));
    ASSERT_EQ("1v2", Get(1, "foo", s2));
    ASSERT_EQ("0v4", Get(0, "foo"));
    ASSERT_EQ("1v4", Get(1, "foo"));
6691 6692
    ASSERT_EQ(1U, GetNumSnapshots());
    ASSERT_LT(time_snap1, GetTimeOldestSnapshots());
6693

S
Sanjay Ghemawat 已提交
6694
    db_->ReleaseSnapshot(s2);
6695
    ASSERT_EQ(0U, GetNumSnapshots());
6696 6697
    ASSERT_EQ("0v4", Get(0, "foo"));
    ASSERT_EQ("1v4", Get(1, "foo"));
6698
  } while (ChangeOptions(kSkipHashCuckoo));
S
Sanjay Ghemawat 已提交
6699
}
J
jorlow@chromium.org 已提交
6700

I
Igor Sugak 已提交
6701
TEST_F(DBTest, HiddenValuesAreRemoved) {
6702 6703
  anon::OptionsOverride options_override;
  options_override.skip_policy = kSkipNoSnapshot;
S
Sanjay Ghemawat 已提交
6704
  do {
6705
    Options options = CurrentOptions(options_override);
I
Igor Canadi 已提交
6706
    options.max_background_flushes = 0;
L
Lei Jin 已提交
6707
    CreateAndReopenWithCF({"pikachu"}, options);
S
Sanjay Ghemawat 已提交
6708
    Random rnd(301);
6709
    FillLevels("a", "z", 1);
S
Sanjay Ghemawat 已提交
6710 6711

    std::string big = RandomString(&rnd, 50000);
6712 6713
    Put(1, "foo", big);
    Put(1, "pastfoo", "v");
S
Sanjay Ghemawat 已提交
6714
    const Snapshot* snapshot = db_->GetSnapshot();
6715 6716
    Put(1, "foo", "tiny");
    Put(1, "pastfoo2", "v2");  // Advance sequence number one more
S
Sanjay Ghemawat 已提交
6717

6718 6719
    ASSERT_OK(Flush(1));
    ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
S
Sanjay Ghemawat 已提交
6720

6721 6722
    ASSERT_EQ(big, Get(1, "foo", snapshot));
    ASSERT_TRUE(Between(Size("", "pastfoo", 1), 50000, 60000));
S
Sanjay Ghemawat 已提交
6723
    db_->ReleaseSnapshot(snapshot);
6724
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny, " + big + " ]");
S
Sanjay Ghemawat 已提交
6725
    Slice x("x");
6726 6727 6728 6729 6730 6731 6732 6733
    dbfull()->TEST_CompactRange(0, nullptr, &x, handles_[1]);
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
    ASSERT_GE(NumTableFilesAtLevel(1, 1), 1);
    dbfull()->TEST_CompactRange(1, nullptr, &x, handles_[1]);
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");

    ASSERT_TRUE(Between(Size("", "pastfoo", 1), 0, 1000));
6734 6735
    // ApproximateOffsetOf() is not yet implemented in plain table format,
    // which is used by Size().
6736
    // skip HashCuckooRep as it does not support snapshot
I
Igor Canadi 已提交
6737 6738
  } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
                         kSkipPlainTable | kSkipHashCuckoo));
J
jorlow@chromium.org 已提交
6739 6740
}

I
Igor Sugak 已提交
6741
TEST_F(DBTest, CompactBetweenSnapshots) {
6742 6743
  anon::OptionsOverride options_override;
  options_override.skip_policy = kSkipNoSnapshot;
6744
  do {
6745
    Options options = CurrentOptions(options_override);
6746
    options.disable_auto_compactions = true;
L
Lei Jin 已提交
6747
    CreateAndReopenWithCF({"pikachu"}, options);
6748
    Random rnd(301);
6749
    FillLevels("a", "z", 1);
6750

6751
    Put(1, "foo", "first");
6752
    const Snapshot* snapshot1 = db_->GetSnapshot();
6753 6754 6755
    Put(1, "foo", "second");
    Put(1, "foo", "third");
    Put(1, "foo", "fourth");
6756
    const Snapshot* snapshot2 = db_->GetSnapshot();
6757 6758
    Put(1, "foo", "fifth");
    Put(1, "foo", "sixth");
6759 6760 6761

    // All entries (including duplicates) exist
    // before any compaction is triggered.
6762 6763 6764 6765 6766
    ASSERT_OK(Flush(1));
    ASSERT_EQ("sixth", Get(1, "foo"));
    ASSERT_EQ("fourth", Get(1, "foo", snapshot2));
    ASSERT_EQ("first", Get(1, "foo", snapshot1));
    ASSERT_EQ(AllEntriesFor("foo", 1),
6767 6768 6769 6770
              "[ sixth, fifth, fourth, third, second, first ]");

    // After a compaction, "second", "third" and "fifth" should
    // be removed
6771 6772 6773 6774 6775 6776
    FillLevels("a", "z", 1);
    dbfull()->CompactRange(handles_[1], nullptr, nullptr);
    ASSERT_EQ("sixth", Get(1, "foo"));
    ASSERT_EQ("fourth", Get(1, "foo", snapshot2));
    ASSERT_EQ("first", Get(1, "foo", snapshot1));
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth, first ]");
6777 6778 6779

    // after we release the snapshot1, only two values left
    db_->ReleaseSnapshot(snapshot1);
6780 6781
    FillLevels("a", "z", 1);
    dbfull()->CompactRange(handles_[1], nullptr, nullptr);
6782 6783 6784

    // We have only one valid snapshot snapshot2. Since snapshot1 is
    // not valid anymore, "first" should be removed by a compaction.
6785 6786 6787
    ASSERT_EQ("sixth", Get(1, "foo"));
    ASSERT_EQ("fourth", Get(1, "foo", snapshot2));
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth ]");
6788 6789 6790

    // after we release the snapshot2, only one value should be left
    db_->ReleaseSnapshot(snapshot2);
6791 6792 6793 6794
    FillLevels("a", "z", 1);
    dbfull()->CompactRange(handles_[1], nullptr, nullptr);
    ASSERT_EQ("sixth", Get(1, "foo"));
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth ]");
6795
    // skip HashCuckooRep as it does not support snapshot
I
Igor Canadi 已提交
6796
  } while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction));
6797 6798
}

I
Igor Sugak 已提交
6799
TEST_F(DBTest, DeletionMarkers1) {
I
Igor Canadi 已提交
6800 6801
  Options options = CurrentOptions();
  options.max_background_flushes = 0;
L
Lei Jin 已提交
6802
  CreateAndReopenWithCF({"pikachu"}, options);
6803 6804 6805 6806 6807
  Put(1, "foo", "v1");
  ASSERT_OK(Flush(1));
  const int last = CurrentOptions().max_mem_compaction_level;
  // foo => v1 is now in last level
  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
6808 6809

  // Place a table at level last-1 to prevent merging with preceding mutation
6810 6811 6812 6813 6814 6815 6816 6817 6818 6819
  Put(1, "a", "begin");
  Put(1, "z", "end");
  Flush(1);
  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
  ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);

  Delete(1, "foo");
  Put(1, "foo", "v2");
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]");
  ASSERT_OK(Flush(1));  // Moves to level last-2
6820
  if (CurrentOptions().purge_redundant_kvs_while_flush) {
6821
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
6822
  } else {
6823
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]");
6824
  }
G
Gabor Cselle 已提交
6825
  Slice z("z");
6826
  dbfull()->TEST_CompactRange(last - 2, nullptr, &z, handles_[1]);
J
jorlow@chromium.org 已提交
6827 6828
  // DEL eliminated, but v1 remains because we aren't compacting that level
  // (DEL can be eliminated because v2 hides v1).
6829 6830
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
  dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]);
6831 6832
  // Merging last-1 w/ last, so we are the base level for "foo", so
  // DEL is removed.  (as is v1).
6833
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]");
J
jorlow@chromium.org 已提交
6834 6835
}

I
Igor Sugak 已提交
6836
TEST_F(DBTest, DeletionMarkers2) {
I
Igor Canadi 已提交
6837 6838
  Options options = CurrentOptions();
  options.max_background_flushes = 0;
L
Lei Jin 已提交
6839
  CreateAndReopenWithCF({"pikachu"}, options);
6840 6841 6842 6843 6844
  Put(1, "foo", "v1");
  ASSERT_OK(Flush(1));
  const int last = CurrentOptions().max_mem_compaction_level;
  // foo => v1 is now in last level
  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
6845 6846

  // Place a table at level last-1 to prevent merging with preceding mutation
6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857
  Put(1, "a", "begin");
  Put(1, "z", "end");
  Flush(1);
  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
  ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);

  Delete(1, "foo");
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
  ASSERT_OK(Flush(1));  // Moves to level last-2
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
  dbfull()->TEST_CompactRange(last - 2, nullptr, nullptr, handles_[1]);
6858
  // DEL kept: "last" file overlaps
6859 6860
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
  dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]);
6861 6862
  // Merging last-1 w/ last, so we are the base level for "foo", so
  // DEL is removed.  (as is v1).
6863
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
J
jorlow@chromium.org 已提交
6864 6865
}

I
Igor Sugak 已提交
6866
TEST_F(DBTest, OverlapInLevel0) {
S
Sanjay Ghemawat 已提交
6867
  do {
I
Igor Canadi 已提交
6868 6869
    Options options = CurrentOptions();
    options.max_background_flushes = 0;
L
Lei Jin 已提交
6870
    CreateAndReopenWithCF({"pikachu"}, options);
6871
    int tmp = CurrentOptions().max_mem_compaction_level;
6872
    ASSERT_EQ(tmp, 2) << "Fix test to match config";
G
Gabor Cselle 已提交
6873

N
Naman Gupta 已提交
6874
    //Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0.
6875 6876 6877 6878 6879 6880 6881
    ASSERT_OK(Put(1, "100", "v100"));
    ASSERT_OK(Put(1, "999", "v999"));
    Flush(1);
    ASSERT_OK(Delete(1, "100"));
    ASSERT_OK(Delete(1, "999"));
    Flush(1);
    ASSERT_EQ("0,1,1", FilesPerLevel(1));
S
Sanjay Ghemawat 已提交
6882 6883 6884 6885 6886

    // Make files spanning the following ranges in level-0:
    //  files[0]  200 .. 900
    //  files[1]  300 .. 500
    // Note that files are sorted by smallest key.
6887 6888 6889 6890 6891 6892 6893 6894
    ASSERT_OK(Put(1, "300", "v300"));
    ASSERT_OK(Put(1, "500", "v500"));
    Flush(1);
    ASSERT_OK(Put(1, "200", "v200"));
    ASSERT_OK(Put(1, "600", "v600"));
    ASSERT_OK(Put(1, "900", "v900"));
    Flush(1);
    ASSERT_EQ("2,1,1", FilesPerLevel(1));
G
Gabor Cselle 已提交
6895

S
Sanjay Ghemawat 已提交
6896
    // Compact away the placeholder files we created initially
6897 6898 6899
    dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
    dbfull()->TEST_CompactRange(2, nullptr, nullptr, handles_[1]);
    ASSERT_EQ("2", FilesPerLevel(1));
G
Gabor Cselle 已提交
6900

S
Sanjay Ghemawat 已提交
6901 6902 6903
    // Do a memtable compaction.  Before bug-fix, the compaction would
    // not detect the overlap with level-0 files and would incorrectly place
    // the deletion in a deeper level.
6904 6905 6906 6907
    ASSERT_OK(Delete(1, "600"));
    Flush(1);
    ASSERT_EQ("3", FilesPerLevel(1));
    ASSERT_EQ("NOT_FOUND", Get(1, "600"));
I
Igor Canadi 已提交
6908
  } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction));
G
Gabor Cselle 已提交
6909 6910
}

I
Igor Sugak 已提交
6911
TEST_F(DBTest, L0_CompactionBug_Issue44_a) {
6912
  do {
L
Lei Jin 已提交
6913
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
6914
    ASSERT_OK(Put(1, "b", "v"));
L
Lei Jin 已提交
6915
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
6916 6917
    ASSERT_OK(Delete(1, "b"));
    ASSERT_OK(Delete(1, "a"));
L
Lei Jin 已提交
6918
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
6919
    ASSERT_OK(Delete(1, "a"));
L
Lei Jin 已提交
6920
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
6921
    ASSERT_OK(Put(1, "a", "v"));
L
Lei Jin 已提交
6922 6923
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
6924
    ASSERT_EQ("(a->v)", Contents(1));
6925
    env_->SleepForMicroseconds(1000000);  // Wait for compaction to finish
6926
    ASSERT_EQ("(a->v)", Contents(1));
6927
  } while (ChangeCompactOptions());
H
Hans Wennborg 已提交
6928 6929
}

I
Igor Sugak 已提交
6930
TEST_F(DBTest, L0_CompactionBug_Issue44_b) {
6931
  do {
L
Lei Jin 已提交
6932
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
6933
    Put(1, "", "");
L
Lei Jin 已提交
6934
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
6935 6936
    Delete(1, "e");
    Put(1, "", "");
L
Lei Jin 已提交
6937
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
6938
    Put(1, "c", "cv");
L
Lei Jin 已提交
6939
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
6940
    Put(1, "", "");
L
Lei Jin 已提交
6941
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
6942
    Put(1, "", "");
6943
    env_->SleepForMicroseconds(1000000);  // Wait for compaction to finish
L
Lei Jin 已提交
6944
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
6945
    Put(1, "d", "dv");
L
Lei Jin 已提交
6946
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
6947
    Put(1, "", "");
L
Lei Jin 已提交
6948
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
6949 6950
    Delete(1, "d");
    Delete(1, "b");
L
Lei Jin 已提交
6951
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
6952
    ASSERT_EQ("(->)(c->cv)", Contents(1));
6953
    env_->SleepForMicroseconds(1000000);  // Wait for compaction to finish
6954
    ASSERT_EQ("(->)(c->cv)", Contents(1));
6955
  } while (ChangeCompactOptions());
H
Hans Wennborg 已提交
6956 6957
}

I
Igor Sugak 已提交
6958
TEST_F(DBTest, ComparatorCheck) {
J
jorlow@chromium.org 已提交
6959 6960
  class NewComparator : public Comparator {
   public:
I
Igor Sugak 已提交
6961 6962 6963 6964
    virtual const char* Name() const override {
      return "rocksdb.NewComparator";
    }
    virtual int Compare(const Slice& a, const Slice& b) const override {
J
jorlow@chromium.org 已提交
6965 6966
      return BytewiseComparator()->Compare(a, b);
    }
I
Igor Sugak 已提交
6967 6968
    virtual void FindShortestSeparator(std::string* s,
                                       const Slice& l) const override {
J
jorlow@chromium.org 已提交
6969 6970
      BytewiseComparator()->FindShortestSeparator(s, l);
    }
I
Igor Sugak 已提交
6971
    virtual void FindShortSuccessor(std::string* key) const override {
J
jorlow@chromium.org 已提交
6972 6973 6974
      BytewiseComparator()->FindShortSuccessor(key);
    }
  };
6975
  Options new_options, options;
6976
  NewComparator cmp;
6977
  do {
6978
    options = CurrentOptions();
L
Lei Jin 已提交
6979
    CreateAndReopenWithCF({"pikachu"}, options);
6980
    new_options = CurrentOptions();
6981
    new_options.comparator = &cmp;
6982 6983
    // only the non-default column family has non-matching comparator
    Status s = TryReopenWithColumnFamilies({"default", "pikachu"},
L
Lei Jin 已提交
6984
        std::vector<Options>({options, new_options}));
6985 6986 6987
    ASSERT_TRUE(!s.ok());
    ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
        << s.ToString();
6988
  } while (ChangeCompactOptions());
J
jorlow@chromium.org 已提交
6989 6990
}

I
Igor Sugak 已提交
6991
TEST_F(DBTest, CustomComparator) {
H
Hans Wennborg 已提交
6992 6993
  class NumberComparator : public Comparator {
   public:
I
Igor Sugak 已提交
6994 6995 6996 6997
    virtual const char* Name() const override {
      return "test.NumberComparator";
    }
    virtual int Compare(const Slice& a, const Slice& b) const override {
6998 6999
      return ToNumber(a) - ToNumber(b);
    }
I
Igor Sugak 已提交
7000 7001
    virtual void FindShortestSeparator(std::string* s,
                                       const Slice& l) const override {
7002 7003 7004
      ToNumber(*s);     // Check format
      ToNumber(l);      // Check format
    }
I
Igor Sugak 已提交
7005
    virtual void FindShortSuccessor(std::string* key) const override {
7006 7007 7008 7009 7010
      ToNumber(*key);   // Check format
    }
   private:
    static int ToNumber(const Slice& x) {
      // Check that there are no extra characters.
7011
      EXPECT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size() - 1] == ']')
7012 7013 7014
          << EscapeString(x);
      int val;
      char ignored;
7015
      EXPECT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
7016 7017
          << EscapeString(x);
      return val;
H
Hans Wennborg 已提交
7018 7019
    }
  };
7020 7021
  Options new_options;
  NumberComparator cmp;
7022
  do {
7023
    new_options = CurrentOptions();
7024 7025 7026
    new_options.create_if_missing = true;
    new_options.comparator = &cmp;
    new_options.write_buffer_size = 1000;  // Compact more often
7027
    new_options = CurrentOptions(new_options);
L
Lei Jin 已提交
7028
    DestroyAndReopen(new_options);
L
Lei Jin 已提交
7029
    CreateAndReopenWithCF({"pikachu"}, new_options);
7030 7031
    ASSERT_OK(Put(1, "[10]", "ten"));
    ASSERT_OK(Put(1, "[0x14]", "twenty"));
7032
    for (int i = 0; i < 2; i++) {
7033 7034 7035 7036 7037 7038 7039
      ASSERT_EQ("ten", Get(1, "[10]"));
      ASSERT_EQ("ten", Get(1, "[0xa]"));
      ASSERT_EQ("twenty", Get(1, "[20]"));
      ASSERT_EQ("twenty", Get(1, "[0x14]"));
      ASSERT_EQ("NOT_FOUND", Get(1, "[15]"));
      ASSERT_EQ("NOT_FOUND", Get(1, "[0xf]"));
      Compact(1, "[0]", "[9999]");
7040 7041 7042 7043 7044 7045
    }

    for (int run = 0; run < 2; run++) {
      for (int i = 0; i < 1000; i++) {
        char buf[100];
        snprintf(buf, sizeof(buf), "[%d]", i*10);
7046
        ASSERT_OK(Put(1, buf, buf));
7047
      }
7048
      Compact(1, "[0]", "[1000000]");
7049
    }
7050
  } while (ChangeCompactOptions());
H
Hans Wennborg 已提交
7051 7052
}

I
Igor Sugak 已提交
7053
TEST_F(DBTest, ManualCompaction) {
I
Igor Canadi 已提交
7054 7055
  Options options = CurrentOptions();
  options.max_background_flushes = 0;
L
Lei Jin 已提交
7056
  CreateAndReopenWithCF({"pikachu"}, options);
7057
  ASSERT_EQ(dbfull()->MaxMemCompactionLevel(), 2)
G
Gabor Cselle 已提交
7058 7059
      << "Need to update this test to match kMaxMemCompactLevel";

7060 7061 7062
  // iter - 0 with 7 levels
  // iter - 1 with 3 levels
  for (int iter = 0; iter < 2; ++iter) {
7063 7064
    MakeTables(3, "p", "q", 1);
    ASSERT_EQ("1,1,1", FilesPerLevel(1));
7065 7066

    // Compaction range falls before files
7067 7068
    Compact(1, "", "c");
    ASSERT_EQ("1,1,1", FilesPerLevel(1));
G
Gabor Cselle 已提交
7069

7070
    // Compaction range falls after files
7071 7072
    Compact(1, "r", "z");
    ASSERT_EQ("1,1,1", FilesPerLevel(1));
G
Gabor Cselle 已提交
7073

7074
    // Compaction range overlaps files
7075 7076
    Compact(1, "p1", "p9");
    ASSERT_EQ("0,0,1", FilesPerLevel(1));
G
Gabor Cselle 已提交
7077

7078
    // Populate a different range
7079 7080
    MakeTables(3, "c", "e", 1);
    ASSERT_EQ("1,1,2", FilesPerLevel(1));
G
Gabor Cselle 已提交
7081

7082
    // Compact just the new range
7083 7084
    Compact(1, "b", "f");
    ASSERT_EQ("0,0,2", FilesPerLevel(1));
G
Gabor Cselle 已提交
7085

7086
    // Compact all
7087 7088 7089 7090
    MakeTables(1, "a", "z", 1);
    ASSERT_EQ("0,1,2", FilesPerLevel(1));
    db_->CompactRange(handles_[1], nullptr, nullptr);
    ASSERT_EQ("0,0,1", FilesPerLevel(1));
7091 7092

    if (iter == 0) {
I
Igor Canadi 已提交
7093
      options = CurrentOptions();
I
Igor Canadi 已提交
7094
      options.max_background_flushes = 0;
7095 7096
      options.num_levels = 3;
      options.create_if_missing = true;
L
Lei Jin 已提交
7097
      DestroyAndReopen(options);
L
Lei Jin 已提交
7098
      CreateAndReopenWithCF({"pikachu"}, options);
7099 7100
    }
  }
G
Gabor Cselle 已提交
7101 7102 7103

}

S
sdong 已提交
7104 7105 7106 7107 7108
class DBTestUniversalManualCompactionOutputPathId
    : public DBTestUniversalCompactionBase {};

TEST_P(DBTestUniversalManualCompactionOutputPathId,
       ManualCompactionOutputPathId) {
7109 7110 7111 7112 7113
  Options options = CurrentOptions();
  options.create_if_missing = true;
  options.db_paths.emplace_back(dbname_, 1000000000);
  options.db_paths.emplace_back(dbname_ + "_2", 1000000000);
  options.compaction_style = kCompactionStyleUniversal;
S
sdong 已提交
7114 7115
  options.num_levels = num_levels_;
  options.target_file_size_base = 1 << 30;  // Big size
7116
  options.level0_file_num_compaction_trigger = 10;
L
Lei Jin 已提交
7117 7118
  Destroy(options);
  DestroyAndReopen(options);
L
Lei Jin 已提交
7119
  CreateAndReopenWithCF({"pikachu"}, options);
7120 7121
  MakeTables(3, "p", "q", 1);
  dbfull()->TEST_WaitForCompact();
S
sdong 已提交
7122
  ASSERT_EQ(3, TotalLiveFiles(1));
7123 7124 7125 7126 7127
  ASSERT_EQ(3, GetSstFileCount(options.db_paths[0].path));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[1].path));

  // Full compaction to DB path 0
  db_->CompactRange(handles_[1], nullptr, nullptr, false, -1, 1);
S
sdong 已提交
7128
  ASSERT_EQ(1, TotalLiveFiles(1));
7129 7130 7131
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));

L
Lei Jin 已提交
7132
  ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options);
S
sdong 已提交
7133
  ASSERT_EQ(1, TotalLiveFiles(1));
7134 7135 7136 7137
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));

  MakeTables(1, "p", "q", 1);
S
sdong 已提交
7138
  ASSERT_EQ(2, TotalLiveFiles(1));
7139 7140 7141
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));

L
Lei Jin 已提交
7142
  ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options);
S
sdong 已提交
7143
  ASSERT_EQ(2, TotalLiveFiles(1));
7144 7145 7146 7147 7148
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));

  // Full compaction to DB path 0
  db_->CompactRange(handles_[1], nullptr, nullptr, false, -1, 0);
S
sdong 已提交
7149
  ASSERT_EQ(1, TotalLiveFiles(1));
7150 7151 7152 7153 7154 7155 7156 7157
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[1].path));

  // Fail when compacting to an invalid path ID
  ASSERT_TRUE(db_->CompactRange(handles_[1], nullptr, nullptr, false, -1, 2)
                  .IsInvalidArgument());
}

S
sdong 已提交
7158 7159 7160 7161
INSTANTIATE_TEST_CASE_P(DBTestUniversalManualCompactionOutputPathId,
                        DBTestUniversalManualCompactionOutputPathId,
                        ::testing::Values(1, 8));

I
Igor Sugak 已提交
7162
TEST_F(DBTest, ManualLevelCompactionOutputPathId) {
7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230
  Options options = CurrentOptions();
  options.db_paths.emplace_back(dbname_ + "_2", 2 * 10485760);
  options.db_paths.emplace_back(dbname_ + "_3", 100 * 10485760);
  options.db_paths.emplace_back(dbname_ + "_4", 120 * 10485760);
  options.max_background_flushes = 1;
  CreateAndReopenWithCF({"pikachu"}, options);
  ASSERT_EQ(dbfull()->MaxMemCompactionLevel(), 2)
      << "Need to update this test to match kMaxMemCompactLevel";

  // iter - 0 with 7 levels
  // iter - 1 with 3 levels
  for (int iter = 0; iter < 2; ++iter) {
    MakeTables(3, "p", "q", 1);
    ASSERT_EQ("3", FilesPerLevel(1));
    ASSERT_EQ(3, GetSstFileCount(options.db_paths[0].path));
    ASSERT_EQ(0, GetSstFileCount(dbname_));

    // Compaction range falls before files
    Compact(1, "", "c");
    ASSERT_EQ("3", FilesPerLevel(1));

    // Compaction range falls after files
    Compact(1, "r", "z");
    ASSERT_EQ("3", FilesPerLevel(1));

    // Compaction range overlaps files
    Compact(1, "p1", "p9", 1);
    ASSERT_EQ("0,1", FilesPerLevel(1));
    ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
    ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path));
    ASSERT_EQ(0, GetSstFileCount(dbname_));

    // Populate a different range
    MakeTables(3, "c", "e", 1);
    ASSERT_EQ("3,1", FilesPerLevel(1));

    // Compact just the new range
    Compact(1, "b", "f", 1);
    ASSERT_EQ("0,2", FilesPerLevel(1));
    ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path));
    ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path));
    ASSERT_EQ(0, GetSstFileCount(dbname_));

    // Compact all
    MakeTables(1, "a", "z", 1);
    ASSERT_EQ("1,2", FilesPerLevel(1));
    ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path));
    ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path));
    db_->CompactRange(handles_[1], nullptr, nullptr, false, 1, 1);
    ASSERT_EQ("0,1", FilesPerLevel(1));
    ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
    ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path));
    ASSERT_EQ(0, GetSstFileCount(dbname_));

    if (iter == 0) {
      DestroyAndReopen(options);
      options = CurrentOptions();
      options.db_paths.emplace_back(dbname_ + "_2", 2 * 10485760);
      options.db_paths.emplace_back(dbname_ + "_3", 100 * 10485760);
      options.db_paths.emplace_back(dbname_ + "_4", 120 * 10485760);
      options.max_background_flushes = 1;
      options.num_levels = 3;
      options.create_if_missing = true;
      CreateAndReopenWithCF({"pikachu"}, options);
    }
  }
}

I
Igor Sugak 已提交
7231
TEST_F(DBTest, DBOpen_Options) {
7232 7233 7234
  Options options = CurrentOptions();
  std::string dbname = test::TmpDir(env_) + "/db_options_test";
  ASSERT_OK(DestroyDB(dbname, options));
J
jorlow@chromium.org 已提交
7235 7236

  // Does not exist, and create_if_missing == false: error
A
Abhishek Kona 已提交
7237
  DB* db = nullptr;
7238 7239
  options.create_if_missing = false;
  Status s = DB::Open(options, dbname, &db);
A
Abhishek Kona 已提交
7240 7241
  ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr);
  ASSERT_TRUE(db == nullptr);
J
jorlow@chromium.org 已提交
7242 7243

  // Does not exist, and create_if_missing == true: OK
7244 7245
  options.create_if_missing = true;
  s = DB::Open(options, dbname, &db);
J
jorlow@chromium.org 已提交
7246
  ASSERT_OK(s);
A
Abhishek Kona 已提交
7247
  ASSERT_TRUE(db != nullptr);
J
jorlow@chromium.org 已提交
7248 7249

  delete db;
A
Abhishek Kona 已提交
7250
  db = nullptr;
J
jorlow@chromium.org 已提交
7251 7252

  // Does exist, and error_if_exists == true: error
7253 7254 7255
  options.create_if_missing = false;
  options.error_if_exists = true;
  s = DB::Open(options, dbname, &db);
A
Abhishek Kona 已提交
7256 7257
  ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr);
  ASSERT_TRUE(db == nullptr);
J
jorlow@chromium.org 已提交
7258 7259

  // Does exist, and error_if_exists == false: OK
7260 7261 7262
  options.create_if_missing = true;
  options.error_if_exists = false;
  s = DB::Open(options, dbname, &db);
J
jorlow@chromium.org 已提交
7263
  ASSERT_OK(s);
A
Abhishek Kona 已提交
7264
  ASSERT_TRUE(db != nullptr);
J
jorlow@chromium.org 已提交
7265 7266

  delete db;
A
Abhishek Kona 已提交
7267
  db = nullptr;
J
jorlow@chromium.org 已提交
7268 7269
}

I
Igor Sugak 已提交
7270
TEST_F(DBTest, DBOpen_Change_NumLevels) {
7271 7272 7273 7274
  Options options = CurrentOptions();
  options.create_if_missing = true;
  options.max_background_flushes = 0;
  DestroyAndReopen(options);
7275
  ASSERT_TRUE(db_ != nullptr);
7276
  CreateAndReopenWithCF({"pikachu"}, options);
7277 7278 7279 7280 7281

  ASSERT_OK(Put(1, "a", "123"));
  ASSERT_OK(Put(1, "b", "234"));
  db_->CompactRange(handles_[1], nullptr, nullptr);
  Close();
7282

7283 7284 7285
  options.create_if_missing = false;
  options.num_levels = 2;
  Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
7286
  ASSERT_TRUE(strstr(s.ToString().c_str(), "Invalid argument") != nullptr);
7287
  ASSERT_TRUE(db_ == nullptr);
7288 7289
}

I
Igor Sugak 已提交
7290
TEST_F(DBTest, DestroyDBMetaDatabase) {
7291
  std::string dbname = test::TmpDir(env_) + "/db_meta";
7292
  ASSERT_OK(env_->CreateDirIfMissing(dbname));
K
Kosie van der Merwe 已提交
7293
  std::string metadbname = MetaDatabaseName(dbname, 0);
7294
  ASSERT_OK(env_->CreateDirIfMissing(metadbname));
K
Kosie van der Merwe 已提交
7295
  std::string metametadbname = MetaDatabaseName(metadbname, 0);
7296
  ASSERT_OK(env_->CreateDirIfMissing(metametadbname));
K
Kosie van der Merwe 已提交
7297 7298

  // Destroy previous versions if they exist. Using the long way.
7299 7300 7301 7302
  Options options = CurrentOptions();
  ASSERT_OK(DestroyDB(metametadbname, options));
  ASSERT_OK(DestroyDB(metadbname, options));
  ASSERT_OK(DestroyDB(dbname, options));
K
Kosie van der Merwe 已提交
7303 7304

  // Setup databases
A
Abhishek Kona 已提交
7305
  DB* db = nullptr;
7306
  ASSERT_OK(DB::Open(options, dbname, &db));
K
Kosie van der Merwe 已提交
7307
  delete db;
A
Abhishek Kona 已提交
7308
  db = nullptr;
7309
  ASSERT_OK(DB::Open(options, metadbname, &db));
K
Kosie van der Merwe 已提交
7310
  delete db;
A
Abhishek Kona 已提交
7311
  db = nullptr;
7312
  ASSERT_OK(DB::Open(options, metametadbname, &db));
K
Kosie van der Merwe 已提交
7313
  delete db;
A
Abhishek Kona 已提交
7314
  db = nullptr;
K
Kosie van der Merwe 已提交
7315 7316

  // Delete databases
7317
  ASSERT_OK(DestroyDB(dbname, options));
K
Kosie van der Merwe 已提交
7318 7319

  // Check if deletion worked.
7320 7321 7322 7323
  options.create_if_missing = false;
  ASSERT_TRUE(!(DB::Open(options, dbname, &db)).ok());
  ASSERT_TRUE(!(DB::Open(options, metadbname, &db)).ok());
  ASSERT_TRUE(!(DB::Open(options, metametadbname, &db)).ok());
K
Kosie van der Merwe 已提交
7324 7325
}

7326
// Check that number of files does not grow when writes are dropped
I
Igor Sugak 已提交
7327
TEST_F(DBTest, DropWrites) {
7328 7329 7330
  do {
    Options options = CurrentOptions();
    options.env = env_;
7331
    options.paranoid_checks = false;
L
Lei Jin 已提交
7332
    Reopen(options);
7333

7334 7335 7336
    ASSERT_OK(Put("foo", "v1"));
    ASSERT_EQ("v1", Get("foo"));
    Compact("a", "z");
7337
    const size_t num_files = CountFiles();
I
Igor Canadi 已提交
7338 7339
    // Force out-of-space errors
    env_->drop_writes_.store(true, std::memory_order_release);
7340 7341
    env_->sleep_counter_.Reset();
    for (int i = 0; i < 5; i++) {
7342 7343 7344 7345 7346
      if (option_config_ != kUniversalCompactionMultiLevel) {
        for (int level = 0; level < dbfull()->NumberLevels(); level++) {
          if (level > 0 && level == dbfull()->NumberLevels() - 1) {
            break;
          }
7347 7348
          dbfull()->TEST_CompactRange(level, nullptr, nullptr, nullptr,
                                      true /* disallow trivial move */);
S
sdong 已提交
7349
        }
7350 7351
      } else {
        dbfull()->CompactRange(nullptr, nullptr);
7352
      }
7353
    }
7354 7355 7356 7357 7358

    std::string property_value;
    ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
    ASSERT_EQ("5", property_value);

I
Igor Canadi 已提交
7359
    env_->drop_writes_.store(false, std::memory_order_release);
7360
    ASSERT_LT(CountFiles(), num_files + 3);
7361

7362 7363 7364
    // Check that compaction attempts slept after errors
    ASSERT_GE(env_->sleep_counter_.Read(), 5);
  } while (ChangeCompactOptions());
7365 7366
}

7367
// Check background error counter bumped on flush failures.
I
Igor Sugak 已提交
7368
TEST_F(DBTest, DropWritesFlush) {
7369 7370 7371 7372
  do {
    Options options = CurrentOptions();
    options.env = env_;
    options.max_background_flushes = 1;
L
Lei Jin 已提交
7373
    Reopen(options);
7374 7375

    ASSERT_OK(Put("foo", "v1"));
I
Igor Canadi 已提交
7376 7377
    // Force out-of-space errors
    env_->drop_writes_.store(true, std::memory_order_release);
7378 7379 7380 7381 7382 7383

    std::string property_value;
    // Background error count is 0 now.
    ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
    ASSERT_EQ("0", property_value);

L
Lei Jin 已提交
7384 7385 7386
    dbfull()->TEST_FlushMemTable(true);

    ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
7387 7388
    ASSERT_EQ("1", property_value);

I
Igor Canadi 已提交
7389
    env_->drop_writes_.store(false, std::memory_order_release);
7390 7391 7392 7393 7394
  } while (ChangeCompactOptions());
}

// Check that CompactRange() returns failure if there is not enough space left
// on device
I
Igor Sugak 已提交
7395
TEST_F(DBTest, NoSpaceCompactRange) {
7396 7397 7398 7399
  do {
    Options options = CurrentOptions();
    options.env = env_;
    options.disable_auto_compactions = true;
L
Lei Jin 已提交
7400
    Reopen(options);
7401 7402 7403 7404 7405 7406 7407

    // generate 5 tables
    for (int i = 0; i < 5; ++i) {
      ASSERT_OK(Put(Key(i), Key(i) + "v"));
      ASSERT_OK(Flush());
    }

I
Igor Canadi 已提交
7408 7409
    // Force out-of-space errors
    env_->no_space_.store(true, std::memory_order_release);
7410

7411 7412
    Status s = dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
                                           true /* disallow trivial move */);
7413 7414
    ASSERT_TRUE(s.IsIOError());

I
Igor Canadi 已提交
7415
    env_->no_space_.store(false, std::memory_order_release);
7416 7417 7418
  } while (ChangeCompactOptions());
}

I
Igor Sugak 已提交
7419
TEST_F(DBTest, NonWritableFileSystem) {
7420 7421 7422 7423
  do {
    Options options = CurrentOptions();
    options.write_buffer_size = 1000;
    options.env = env_;
L
Lei Jin 已提交
7424
    Reopen(options);
7425
    ASSERT_OK(Put("foo", "v1"));
7426
    env_->non_writeable_rate_.store(100);
7427 7428 7429 7430 7431 7432 7433
    std::string big(100000, 'x');
    int errors = 0;
    for (int i = 0; i < 20; i++) {
      if (!Put("foo", big).ok()) {
        errors++;
        env_->SleepForMicroseconds(100000);
      }
7434
    }
7435
    ASSERT_GT(errors, 0);
7436
    env_->non_writeable_rate_.store(0);
7437
  } while (ChangeCompactOptions());
7438 7439
}

I
Igor Sugak 已提交
7440
TEST_F(DBTest, ManifestWriteError) {
7441 7442 7443 7444 7445 7446 7447 7448 7449
  // Test for the following problem:
  // (a) Compaction produces file F
  // (b) Log record containing F is written to MANIFEST file, but Sync() fails
  // (c) GC deletes F
  // (d) After reopening DB, reads fail since deleted F is named in log record

  // We iterate twice.  In the second iteration, everything is the
  // same except the log record never makes it to the MANIFEST file.
  for (int iter = 0; iter < 2; iter++) {
I
Igor Canadi 已提交
7450
    std::atomic<bool>* error_type = (iter == 0)
7451 7452 7453 7454 7455 7456 7457 7458
        ? &env_->manifest_sync_error_
        : &env_->manifest_write_error_;

    // Insert foo=>bar mapping
    Options options = CurrentOptions();
    options.env = env_;
    options.create_if_missing = true;
    options.error_if_exists = false;
I
Igor Canadi 已提交
7459
    options.max_background_flushes = 0;
L
Lei Jin 已提交
7460
    DestroyAndReopen(options);
7461 7462 7463 7464
    ASSERT_OK(Put("foo", "bar"));
    ASSERT_EQ("bar", Get("foo"));

    // Memtable compaction (will succeed)
7465
    Flush();
7466 7467 7468 7469 7470
    ASSERT_EQ("bar", Get("foo"));
    const int last = dbfull()->MaxMemCompactionLevel();
    ASSERT_EQ(NumTableFilesAtLevel(last), 1);   // foo=>bar is now in last level

    // Merging compaction (will fail)
I
Igor Canadi 已提交
7471
    error_type->store(true, std::memory_order_release);
A
Abhishek Kona 已提交
7472
    dbfull()->TEST_CompactRange(last, nullptr, nullptr);  // Should fail
7473 7474 7475
    ASSERT_EQ("bar", Get("foo"));

    // Recovery: should not lose data
I
Igor Canadi 已提交
7476
    error_type->store(false, std::memory_order_release);
L
Lei Jin 已提交
7477
    Reopen(options);
7478 7479 7480 7481
    ASSERT_EQ("bar", Get("foo"));
  }
}

I
Igor Sugak 已提交
7482
TEST_F(DBTest, PutFailsParanoid) {
I
Igor Canadi 已提交
7483 7484 7485 7486 7487 7488 7489 7490 7491 7492
  // Test the following:
  // (a) A random put fails in paranoid mode (simulate by sync fail)
  // (b) All other puts have to fail, even if writes would succeed
  // (c) All of that should happen ONLY if paranoid_checks = true

  Options options = CurrentOptions();
  options.env = env_;
  options.create_if_missing = true;
  options.error_if_exists = false;
  options.paranoid_checks = true;
L
Lei Jin 已提交
7493
  DestroyAndReopen(options);
L
Lei Jin 已提交
7494
  CreateAndReopenWithCF({"pikachu"}, options);
I
Igor Canadi 已提交
7495 7496
  Status s;

7497 7498
  ASSERT_OK(Put(1, "foo", "bar"));
  ASSERT_OK(Put(1, "foo1", "bar1"));
I
Igor Canadi 已提交
7499
  // simulate error
I
Igor Canadi 已提交
7500
  env_->log_write_error_.store(true, std::memory_order_release);
7501
  s = Put(1, "foo2", "bar2");
I
Igor Canadi 已提交
7502
  ASSERT_TRUE(!s.ok());
I
Igor Canadi 已提交
7503
  env_->log_write_error_.store(false, std::memory_order_release);
7504
  s = Put(1, "foo3", "bar3");
I
Igor Canadi 已提交
7505 7506 7507
  // the next put should fail, too
  ASSERT_TRUE(!s.ok());
  // but we're still able to read
7508
  ASSERT_EQ("bar", Get(1, "foo"));
I
Igor Canadi 已提交
7509 7510 7511

  // do the same thing with paranoid checks off
  options.paranoid_checks = false;
L
Lei Jin 已提交
7512
  DestroyAndReopen(options);
L
Lei Jin 已提交
7513
  CreateAndReopenWithCF({"pikachu"}, options);
I
Igor Canadi 已提交
7514

7515 7516
  ASSERT_OK(Put(1, "foo", "bar"));
  ASSERT_OK(Put(1, "foo1", "bar1"));
I
Igor Canadi 已提交
7517
  // simulate error
I
Igor Canadi 已提交
7518
  env_->log_write_error_.store(true, std::memory_order_release);
7519
  s = Put(1, "foo2", "bar2");
I
Igor Canadi 已提交
7520
  ASSERT_TRUE(!s.ok());
I
Igor Canadi 已提交
7521
  env_->log_write_error_.store(false, std::memory_order_release);
7522
  s = Put(1, "foo3", "bar3");
I
Igor Canadi 已提交
7523 7524 7525 7526
  // the next put should NOT fail
  ASSERT_TRUE(s.ok());
}

I
Igor Sugak 已提交
7527
TEST_F(DBTest, FilesDeletedAfterCompaction) {
7528
  do {
L
Lei Jin 已提交
7529
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
7530 7531
    ASSERT_OK(Put(1, "foo", "v2"));
    Compact(1, "a", "z");
7532
    const size_t num_files = CountLiveFiles();
7533
    for (int i = 0; i < 10; i++) {
7534 7535
      ASSERT_OK(Put(1, "foo", "v2"));
      Compact(1, "a", "z");
7536 7537 7538
    }
    ASSERT_EQ(CountLiveFiles(), num_files);
  } while (ChangeCompactOptions());
7539 7540
}

I
Igor Sugak 已提交
7541
TEST_F(DBTest, BloomFilter) {
7542 7543
  do {
    Options options = CurrentOptions();
7544
    env_->count_random_reads_ = true;
7545
    options.env = env_;
7546 7547 7548 7549 7550 7551 7552
    // ChangeCompactOptions() only changes compaction style, which does not
    // trigger reset of table_factory
    BlockBasedTableOptions table_options;
    table_options.no_block_cache = true;
    table_options.filter_policy.reset(NewBloomFilterPolicy(10));
    options.table_factory.reset(NewBlockBasedTableFactory(table_options));

L
Lei Jin 已提交
7553
    CreateAndReopenWithCF({"pikachu"}, options);
S
Sanjay Ghemawat 已提交
7554

7555 7556 7557
    // Populate multiple layers
    const int N = 10000;
    for (int i = 0; i < N; i++) {
7558
      ASSERT_OK(Put(1, Key(i), Key(i)));
7559
    }
7560
    Compact(1, "a", "z");
7561
    for (int i = 0; i < N; i += 100) {
7562
      ASSERT_OK(Put(1, Key(i), Key(i)));
7563
    }
7564
    Flush(1);
S
Sanjay Ghemawat 已提交
7565

7566
    // Prevent auto compactions triggered by seeks
I
Igor Canadi 已提交
7567
    env_->delay_sstable_sync_.store(true, std::memory_order_release);
S
Sanjay Ghemawat 已提交
7568

7569 7570 7571
    // Lookup present keys.  Should rarely read from small sstable.
    env_->random_read_counter_.Reset();
    for (int i = 0; i < N; i++) {
7572
      ASSERT_EQ(Key(i), Get(1, Key(i)));
7573 7574 7575 7576 7577
    }
    int reads = env_->random_read_counter_.Read();
    fprintf(stderr, "%d present => %d reads\n", N, reads);
    ASSERT_GE(reads, N);
    ASSERT_LE(reads, N + 2*N/100);
S
Sanjay Ghemawat 已提交
7578

7579 7580 7581
    // Lookup present keys.  Should rarely read from either sstable.
    env_->random_read_counter_.Reset();
    for (int i = 0; i < N; i++) {
7582
      ASSERT_EQ("NOT_FOUND", Get(1, Key(i) + ".missing"));
7583 7584 7585 7586
    }
    reads = env_->random_read_counter_.Read();
    fprintf(stderr, "%d missing => %d reads\n", N, reads);
    ASSERT_LE(reads, 3*N/100);
S
Sanjay Ghemawat 已提交
7587

I
Igor Canadi 已提交
7588
    env_->delay_sstable_sync_.store(false, std::memory_order_release);
7589 7590
    Close();
  } while (ChangeCompactOptions());
S
Sanjay Ghemawat 已提交
7591 7592
}

I
Igor Sugak 已提交
7593
TEST_F(DBTest, BloomFilterRate) {
7594 7595 7596
  while (ChangeFilterOptions()) {
    Options options = CurrentOptions();
    options.statistics = rocksdb::CreateDBStatistics();
L
Lei Jin 已提交
7597
    CreateAndReopenWithCF({"pikachu"}, options);
7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620

    const int maxKey = 10000;
    for (int i = 0; i < maxKey; i++) {
      ASSERT_OK(Put(1, Key(i), Key(i)));
    }
    // Add a large key to make the file contain wide range
    ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
    Flush(1);

    // Check if they can be found
    for (int i = 0; i < maxKey; i++) {
      ASSERT_EQ(Key(i), Get(1, Key(i)));
    }
    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);

    // Check if filter is useful
    for (int i = 0; i < maxKey; i++) {
      ASSERT_EQ("NOT_FOUND", Get(1, Key(i+33333)));
    }
    ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey*0.98);
  }
}

I
Igor Sugak 已提交
7621
TEST_F(DBTest, BloomFilterCompatibility) {
7622
  Options options = CurrentOptions();
7623 7624 7625 7626 7627 7628
  options.statistics = rocksdb::CreateDBStatistics();
  BlockBasedTableOptions table_options;
  table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));

  // Create with block based filter
L
Lei Jin 已提交
7629
  CreateAndReopenWithCF({"pikachu"}, options);
7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640

  const int maxKey = 10000;
  for (int i = 0; i < maxKey; i++) {
    ASSERT_OK(Put(1, Key(i), Key(i)));
  }
  ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
  Flush(1);

  // Check db with full filter
  table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
L
Lei Jin 已提交
7641
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
7642 7643 7644 7645 7646 7647 7648 7649

  // Check if they can be found
  for (int i = 0; i < maxKey; i++) {
    ASSERT_EQ(Key(i), Get(1, Key(i)));
  }
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
}

I
Igor Sugak 已提交
7650
TEST_F(DBTest, BloomFilterReverseCompatibility) {
7651
  Options options = CurrentOptions();
7652 7653 7654 7655 7656 7657
  options.statistics = rocksdb::CreateDBStatistics();
  BlockBasedTableOptions table_options;
  table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));

  // Create with full filter
L
Lei Jin 已提交
7658
  CreateAndReopenWithCF({"pikachu"}, options);
7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669

  const int maxKey = 10000;
  for (int i = 0; i < maxKey; i++) {
    ASSERT_OK(Put(1, Key(i), Key(i)));
  }
  ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
  Flush(1);

  // Check db with block_based filter
  table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
L
Lei Jin 已提交
7670
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
7671 7672 7673 7674 7675 7676 7677 7678

  // Check if they can be found
  for (int i = 0; i < maxKey; i++) {
    ASSERT_EQ(Key(i), Get(1, Key(i)));
  }
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
}

F
Feng Zhu 已提交
7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711
namespace {
// A wrapped bloom over default FilterPolicy
class WrappedBloom : public FilterPolicy {
 public:
  explicit WrappedBloom(int bits_per_key) :
        filter_(NewBloomFilterPolicy(bits_per_key)),
        counter_(0) {}

  ~WrappedBloom() { delete filter_; }

  const char* Name() const override { return "WrappedRocksDbFilterPolicy"; }

  void CreateFilter(const rocksdb::Slice* keys, int n, std::string* dst)
      const override {
    std::unique_ptr<rocksdb::Slice[]> user_keys(new rocksdb::Slice[n]);
    for (int i = 0; i < n; ++i) {
      user_keys[i] = convertKey(keys[i]);
    }
    return filter_->CreateFilter(user_keys.get(), n, dst);
  }

  bool KeyMayMatch(const rocksdb::Slice& key, const rocksdb::Slice& filter)
      const override {
    counter_++;
    return filter_->KeyMayMatch(convertKey(key), filter);
  }

  uint32_t GetCounter() { return counter_; }

 private:
  const FilterPolicy* filter_;
  mutable uint32_t counter_;

7712
  rocksdb::Slice convertKey(const rocksdb::Slice& key) const {
F
Feng Zhu 已提交
7713 7714 7715 7716 7717
    return key;
  }
};
}  // namespace

I
Igor Sugak 已提交
7718
TEST_F(DBTest, BloomFilterWrapper) {
7719
  Options options = CurrentOptions();
F
Feng Zhu 已提交
7720 7721 7722 7723 7724 7725 7726
  options.statistics = rocksdb::CreateDBStatistics();

  BlockBasedTableOptions table_options;
  WrappedBloom* policy = new WrappedBloom(10);
  table_options.filter_policy.reset(policy);
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));

L
Lei Jin 已提交
7727
  CreateAndReopenWithCF({"pikachu"}, options);
F
Feng Zhu 已提交
7728 7729 7730 7731 7732 7733 7734

  const int maxKey = 10000;
  for (int i = 0; i < maxKey; i++) {
    ASSERT_OK(Put(1, Key(i), Key(i)));
  }
  // Add a large key to make the file contain wide range
  ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
C
Chilledheart 已提交
7735
  ASSERT_EQ(0U, policy->GetCounter());
F
Feng Zhu 已提交
7736 7737 7738 7739 7740 7741 7742
  Flush(1);

  // Check if they can be found
  for (int i = 0; i < maxKey; i++) {
    ASSERT_EQ(Key(i), Get(1, Key(i)));
  }
  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
C
Chilledheart 已提交
7743
  ASSERT_EQ(1U * maxKey, policy->GetCounter());
F
Feng Zhu 已提交
7744 7745 7746 7747 7748 7749

  // Check if filter is useful
  for (int i = 0; i < maxKey; i++) {
    ASSERT_EQ("NOT_FOUND", Get(1, Key(i+33333)));
  }
  ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey*0.98);
C
Chilledheart 已提交
7750
  ASSERT_EQ(2U * maxKey, policy->GetCounter());
F
Feng Zhu 已提交
7751 7752
}

I
Igor Sugak 已提交
7753
TEST_F(DBTest, SnapshotFiles) {
7754 7755 7756
  do {
    Options options = CurrentOptions();
    options.write_buffer_size = 100000000;        // Large write buffer
L
Lei Jin 已提交
7757
    CreateAndReopenWithCF({"pikachu"}, options);
7758

7759
    Random rnd(301);
7760

7761
    // Write 8MB (80 values, each 100K)
7762
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
7763 7764 7765
    std::vector<std::string> values;
    for (int i = 0; i < 80; i++) {
      values.push_back(RandomString(&rnd, 100000));
7766
      ASSERT_OK(Put((i < 40), Key(i), values[i]));
7767
    }
7768

7769
    // assert that nothing makes it to disk yet.
7770
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
7771

7772 7773 7774 7775 7776 7777 7778
    // get a file snapshot
    uint64_t manifest_number = 0;
    uint64_t manifest_size = 0;
    std::vector<std::string> files;
    dbfull()->DisableFileDeletions();
    dbfull()->GetLiveFiles(files, &manifest_size);

7779 7780
    // CURRENT, MANIFEST, *.sst files (one for each CF)
    ASSERT_EQ(files.size(), 4U);
7781 7782 7783 7784 7785 7786

    uint64_t number = 0;
    FileType type;

    // copy these files to a new snapshot directory
    std::string snapdir = dbname_ + ".snapdir/";
7787
    ASSERT_OK(env_->CreateDirIfMissing(snapdir));
7788 7789

    for (unsigned int i = 0; i < files.size(); i++) {
I
Igor Canadi 已提交
7790 7791 7792 7793 7794
      // our clients require that GetLiveFiles returns
      // files with "/" as first character!
      ASSERT_EQ(files[i][0], '/');
      std::string src = dbname_ + files[i];
      std::string dest = snapdir + files[i];
7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807

      uint64_t size;
      ASSERT_OK(env_->GetFileSize(src, &size));

      // record the number and the size of the
      // latest manifest file
      if (ParseFileName(files[i].substr(1), &number, &type)) {
        if (type == kDescriptorFile) {
          if (number > manifest_number) {
            manifest_number = number;
            ASSERT_GE(size, manifest_size);
            size = manifest_size; // copy only valid MANIFEST data
          }
7808 7809
        }
      }
I
Igor Canadi 已提交
7810
      CopyFile(src, dest, size);
7811 7812
    }

7813 7814 7815 7816 7817 7818
    // release file snapshot
    dbfull()->DisableFileDeletions();
    // overwrite one key, this key should not appear in the snapshot
    std::vector<std::string> extras;
    for (unsigned int i = 0; i < 1; i++) {
      extras.push_back(RandomString(&rnd, 100000));
7819
      ASSERT_OK(Put(0, Key(i), extras[i]));
7820
    }
7821

7822
    // verify that data in the snapshot are correct
7823 7824 7825 7826
    std::vector<ColumnFamilyDescriptor> column_families;
    column_families.emplace_back("default", ColumnFamilyOptions());
    column_families.emplace_back("pikachu", ColumnFamilyOptions());
    std::vector<ColumnFamilyHandle*> cf_handles;
7827
    DB* snapdb;
7828
    DBOptions opts;
7829
    opts.env = env_;
7830
    opts.create_if_missing = false;
7831 7832
    Status stat =
        DB::Open(opts, snapdir, column_families, &cf_handles, &snapdb);
N
Naman Gupta 已提交
7833
    ASSERT_OK(stat);
7834 7835 7836 7837

    ReadOptions roptions;
    std::string val;
    for (unsigned int i = 0; i < 80; i++) {
7838
      stat = snapdb->Get(roptions, cf_handles[i < 40], Key(i), &val);
7839 7840
      ASSERT_EQ(values[i].compare(val), 0);
    }
7841 7842 7843
    for (auto cfh : cf_handles) {
      delete cfh;
    }
7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869
    delete snapdb;

    // look at the new live files after we added an 'extra' key
    // and after we took the first snapshot.
    uint64_t new_manifest_number = 0;
    uint64_t new_manifest_size = 0;
    std::vector<std::string> newfiles;
    dbfull()->DisableFileDeletions();
    dbfull()->GetLiveFiles(newfiles, &new_manifest_size);

    // find the new manifest file. assert that this manifest file is
    // the same one as in the previous snapshot. But its size should be
    // larger because we added an extra key after taking the
    // previous shapshot.
    for (unsigned int i = 0; i < newfiles.size(); i++) {
      std::string src = dbname_ + "/" + newfiles[i];
      // record the lognumber and the size of the
      // latest manifest file
      if (ParseFileName(newfiles[i].substr(1), &number, &type)) {
        if (type == kDescriptorFile) {
          if (number > new_manifest_number) {
            uint64_t size;
            new_manifest_number = number;
            ASSERT_OK(env_->GetFileSize(src, &size));
            ASSERT_GE(size, new_manifest_size);
          }
7870 7871 7872
        }
      }
    }
7873 7874
    ASSERT_EQ(manifest_number, new_manifest_number);
    ASSERT_GT(new_manifest_size, manifest_size);
7875

7876 7877 7878
    // release file snapshot
    dbfull()->DisableFileDeletions();
  } while (ChangeCompactOptions());
7879 7880
}

I
Igor Sugak 已提交
7881
TEST_F(DBTest, CompactOnFlush) {
7882 7883
  anon::OptionsOverride options_override;
  options_override.skip_policy = kSkipNoSnapshot;
7884
  do {
7885
    Options options = CurrentOptions(options_override);
7886 7887
    options.purge_redundant_kvs_while_flush = true;
    options.disable_auto_compactions = true;
L
Lei Jin 已提交
7888
    CreateAndReopenWithCF({"pikachu"}, options);
7889

7890 7891 7892
    Put(1, "foo", "v1");
    ASSERT_OK(Flush(1));
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v1 ]");
7893

7894
    // Write two new keys
7895 7896 7897
    Put(1, "a", "begin");
    Put(1, "z", "end");
    Flush(1);
7898

7899
    // Case1: Delete followed by a put
7900 7901 7902
    Delete(1, "foo");
    Put(1, "foo", "v2");
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]");
7903

7904 7905
    // After the current memtable is flushed, the DEL should
    // have been removed
7906 7907
    ASSERT_OK(Flush(1));
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
7908

7909 7910
    dbfull()->CompactRange(handles_[1], nullptr, nullptr);
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]");
7911

7912
    // Case 2: Delete followed by another delete
7913 7914 7915 7916 7917 7918 7919
    Delete(1, "foo");
    Delete(1, "foo");
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, DEL, v2 ]");
    ASSERT_OK(Flush(1));
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v2 ]");
    dbfull()->CompactRange(handles_[1], nullptr, nullptr);
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
7920

7921
    // Case 3: Put followed by a delete
7922 7923 7924 7925 7926 7927 7928
    Put(1, "foo", "v3");
    Delete(1, "foo");
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v3 ]");
    ASSERT_OK(Flush(1));
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL ]");
    dbfull()->CompactRange(handles_[1], nullptr, nullptr);
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
7929

7930
    // Case 4: Put followed by another Put
7931 7932 7933 7934 7935 7936 7937
    Put(1, "foo", "v4");
    Put(1, "foo", "v5");
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5, v4 ]");
    ASSERT_OK(Flush(1));
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5 ]");
    dbfull()->CompactRange(handles_[1], nullptr, nullptr);
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5 ]");
7938

7939
    // clear database
7940 7941 7942
    Delete(1, "foo");
    dbfull()->CompactRange(handles_[1], nullptr, nullptr);
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
7943

7944 7945
    // Case 5: Put followed by snapshot followed by another Put
    // Both puts should remain.
7946
    Put(1, "foo", "v6");
7947
    const Snapshot* snapshot = db_->GetSnapshot();
7948 7949 7950
    Put(1, "foo", "v7");
    ASSERT_OK(Flush(1));
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v7, v6 ]");
7951
    db_->ReleaseSnapshot(snapshot);
7952

7953
    // clear database
7954 7955 7956
    Delete(1, "foo");
    dbfull()->CompactRange(handles_[1], nullptr, nullptr);
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
7957

7958 7959 7960
    // Case 5: snapshot followed by a put followed by another Put
    // Only the last put should remain.
    const Snapshot* snapshot1 = db_->GetSnapshot();
7961 7962 7963 7964
    Put(1, "foo", "v8");
    Put(1, "foo", "v9");
    ASSERT_OK(Flush(1));
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v9 ]");
7965 7966
    db_->ReleaseSnapshot(snapshot1);
  } while (ChangeCompactOptions());
7967 7968
}

I
Igor Canadi 已提交
7969
namespace {
S
Stanislau Hlebik 已提交
7970 7971
std::vector<std::uint64_t> ListSpecificFiles(
    Env* env, const std::string& path, const FileType expected_file_type) {
7972
  std::vector<std::string> files;
S
Stanislau Hlebik 已提交
7973
  std::vector<uint64_t> file_numbers;
7974 7975 7976 7977 7978
  env->GetChildren(path, &files);
  uint64_t number;
  FileType type;
  for (size_t i = 0; i < files.size(); ++i) {
    if (ParseFileName(files[i], &number, &type)) {
S
Stanislau Hlebik 已提交
7979
      if (type == expected_file_type) {
S
Stanislau Hlebik 已提交
7980
        file_numbers.push_back(number);
7981 7982 7983
      }
    }
  }
S
Stanislau Hlebik 已提交
7984
  return std::move(file_numbers);
7985
}
S
Stanislau Hlebik 已提交
7986 7987 7988 7989

std::vector<std::uint64_t> ListTableFiles(Env* env, const std::string& path) {
  return ListSpecificFiles(env, path, kTableFile);
}
I
Igor Canadi 已提交
7990
}  // namespace
7991

I
Igor Sugak 已提交
7992
TEST_F(DBTest, FlushOneColumnFamily) {
7993
  Options options = CurrentOptions();
S
Stanislau Hlebik 已提交
7994 7995
  CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich",
                         "alyosha", "popovich"},
L
Lei Jin 已提交
7996
                        options);
S
Stanislau Hlebik 已提交
7997 7998 7999 8000 8001 8002 8003 8004 8005 8006

  ASSERT_OK(Put(0, "Default", "Default"));
  ASSERT_OK(Put(1, "pikachu", "pikachu"));
  ASSERT_OK(Put(2, "ilya", "ilya"));
  ASSERT_OK(Put(3, "muromec", "muromec"));
  ASSERT_OK(Put(4, "dobrynia", "dobrynia"));
  ASSERT_OK(Put(5, "nikitich", "nikitich"));
  ASSERT_OK(Put(6, "alyosha", "alyosha"));
  ASSERT_OK(Put(7, "popovich", "popovich"));

8007
  for (int i = 0; i < 8; ++i) {
S
Stanislau Hlebik 已提交
8008 8009
    Flush(i);
    auto tables = ListTableFiles(env_, dbname_);
8010
    ASSERT_EQ(tables.size(), i + 1U);
S
Stanislau Hlebik 已提交
8011 8012 8013
  }
}

S
Stanislau Hlebik 已提交
8014 8015 8016 8017 8018
// In https://reviews.facebook.net/D20661 we change
// recovery behavior: previously for each log file each column family
// memtable was flushed, even it was empty. Now it's changed:
// we try to create the smallest number of table files by merging
// updates from multiple logs
I
Igor Sugak 已提交
8019
TEST_F(DBTest, RecoverCheckFileAmountWithSmallWriteBuffer) {
8020
  Options options = CurrentOptions();
S
Stanislau Hlebik 已提交
8021
  options.write_buffer_size = 5000000;
L
Lei Jin 已提交
8022
  CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options);
S
Stanislau Hlebik 已提交
8023 8024 8025 8026 8027 8028 8029 8030 8031 8032 8033 8034 8035 8036 8037

  // Since we will reopen DB with smaller write_buffer_size,
  // each key will go to new SST file
  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));

  ASSERT_OK(Put(3, Key(10), DummyString(1)));
  // Make 'dobrynia' to be flushed and new WAL file to be created
  ASSERT_OK(Put(2, Key(10), DummyString(7500000)));
  ASSERT_OK(Put(2, Key(1), DummyString(1)));
  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
  {
    auto tables = ListTableFiles(env_, dbname_);
I
Igor Canadi 已提交
8038
    ASSERT_EQ(tables.size(), static_cast<size_t>(1));
S
Stanislau Hlebik 已提交
8039
    // Make sure 'dobrynia' was flushed: check sst files amount
I
Igor Canadi 已提交
8040 8041
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
              static_cast<uint64_t>(1));
S
Stanislau Hlebik 已提交
8042 8043 8044 8045 8046 8047 8048 8049 8050 8051
  }
  // New WAL file
  ASSERT_OK(Put(1, Key(1), DummyString(1)));
  ASSERT_OK(Put(1, Key(1), DummyString(1)));
  ASSERT_OK(Put(3, Key(10), DummyString(1)));
  ASSERT_OK(Put(3, Key(10), DummyString(1)));
  ASSERT_OK(Put(3, Key(10), DummyString(1)));

  options.write_buffer_size = 10;
  ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"},
L
Lei Jin 已提交
8052
                           options);
S
Stanislau Hlebik 已提交
8053 8054
  {
    // No inserts => default is empty
I
Igor Canadi 已提交
8055 8056
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
              static_cast<uint64_t>(0));
S
Stanislau Hlebik 已提交
8057
    // First 4 keys goes to separate SSTs + 1 more SST for 2 smaller keys
I
Igor Canadi 已提交
8058 8059
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
              static_cast<uint64_t>(5));
S
Stanislau Hlebik 已提交
8060
    // 1 SST for big key + 1 SST for small one
I
Igor Canadi 已提交
8061 8062
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
              static_cast<uint64_t>(2));
S
Stanislau Hlebik 已提交
8063
    // 1 SST for all keys
I
Igor Canadi 已提交
8064 8065
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
              static_cast<uint64_t>(1));
S
Stanislau Hlebik 已提交
8066 8067 8068 8069 8070 8071 8072 8073
  }
}

// In https://reviews.facebook.net/D20661 we change
// recovery behavior: previously for each log file each column family
// memtable was flushed, even it wasn't empty. Now it's changed:
// we try to create the smallest number of table files by merging
// updates from multiple logs
I
Igor Sugak 已提交
8074
TEST_F(DBTest, RecoverCheckFileAmount) {
8075
  Options options = CurrentOptions();
S
Stanislau Hlebik 已提交
8076
  options.write_buffer_size = 100000;
L
Lei Jin 已提交
8077
  CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options);
S
Stanislau Hlebik 已提交
8078 8079 8080 8081 8082 8083 8084 8085 8086 8087 8088 8089

  ASSERT_OK(Put(0, Key(1), DummyString(1)));
  ASSERT_OK(Put(1, Key(1), DummyString(1)));
  ASSERT_OK(Put(2, Key(1), DummyString(1)));

  // Make 'nikitich' memtable to be flushed
  ASSERT_OK(Put(3, Key(10), DummyString(1002400)));
  ASSERT_OK(Put(3, Key(1), DummyString(1)));
  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
  // 4 memtable are not flushed, 1 sst file
  {
    auto tables = ListTableFiles(env_, dbname_);
I
Igor Canadi 已提交
8090 8091 8092
    ASSERT_EQ(tables.size(), static_cast<size_t>(1));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
              static_cast<uint64_t>(1));
S
Stanislau Hlebik 已提交
8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113
  }
  // Memtable for 'nikitich' has flushed, new WAL file has opened
  // 4 memtable still not flushed

  // Write to new WAL file
  ASSERT_OK(Put(0, Key(1), DummyString(1)));
  ASSERT_OK(Put(1, Key(1), DummyString(1)));
  ASSERT_OK(Put(2, Key(1), DummyString(1)));

  // Fill up 'nikitich' one more time
  ASSERT_OK(Put(3, Key(10), DummyString(1002400)));
  // make it flush
  ASSERT_OK(Put(3, Key(1), DummyString(1)));
  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
  // There are still 4 memtable not flushed, and 2 sst tables
  ASSERT_OK(Put(0, Key(1), DummyString(1)));
  ASSERT_OK(Put(1, Key(1), DummyString(1)));
  ASSERT_OK(Put(2, Key(1), DummyString(1)));

  {
    auto tables = ListTableFiles(env_, dbname_);
I
Igor Canadi 已提交
8114 8115 8116
    ASSERT_EQ(tables.size(), static_cast<size_t>(2));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
              static_cast<uint64_t>(2));
S
Stanislau Hlebik 已提交
8117 8118 8119
  }

  ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"},
L
Lei Jin 已提交
8120
                           options);
S
Stanislau Hlebik 已提交
8121 8122 8123 8124 8125 8126
  {
    std::vector<uint64_t> table_files = ListTableFiles(env_, dbname_);
    // Check, that records for 'default', 'dobrynia' and 'pikachu' from
    // first, second and third WALs  went to the same SST.
    // So, there is 6 SSTs: three  for 'nikitich', one for 'default', one for
    // 'dobrynia', one for 'pikachu'
I
Igor Canadi 已提交
8127 8128 8129 8130 8131 8132 8133 8134
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
              static_cast<uint64_t>(1));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
              static_cast<uint64_t>(3));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
              static_cast<uint64_t>(1));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
              static_cast<uint64_t>(1));
S
Stanislau Hlebik 已提交
8135 8136 8137
  }
}

I
Igor Sugak 已提交
8138
TEST_F(DBTest, SharedWriteBuffer) {
8139
  Options options = CurrentOptions();
8140 8141 8142 8143 8144 8145 8146 8147 8148 8149 8150 8151 8152 8153 8154 8155 8156 8157 8158 8159 8160 8161 8162 8163 8164 8165 8166 8167 8168 8169 8170 8171 8172 8173 8174 8175 8176 8177 8178 8179 8180 8181 8182 8183 8184 8185 8186 8187 8188 8189 8190 8191 8192 8193 8194 8195 8196 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210 8211 8212 8213 8214 8215 8216 8217
  options.db_write_buffer_size = 100000;  // this is the real limit
  options.write_buffer_size    = 500000;  // this is never hit
  CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options);

  // Trigger a flush on every CF
  ASSERT_OK(Put(0, Key(1), DummyString(1)));
  ASSERT_OK(Put(1, Key(1), DummyString(1)));
  ASSERT_OK(Put(3, Key(1), DummyString(90000)));
  ASSERT_OK(Put(2, Key(2), DummyString(20000)));
  ASSERT_OK(Put(2, Key(1), DummyString(1)));
  dbfull()->TEST_WaitForFlushMemTable(handles_[0]);
  dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
  {
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
              static_cast<uint64_t>(1));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
              static_cast<uint64_t>(1));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
              static_cast<uint64_t>(1));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
              static_cast<uint64_t>(1));
  }

  // Flush 'dobrynia' and 'nikitich'
  ASSERT_OK(Put(2, Key(2), DummyString(50000)));
  ASSERT_OK(Put(3, Key(2), DummyString(40000)));
  ASSERT_OK(Put(2, Key(3), DummyString(20000)));
  ASSERT_OK(Put(3, Key(2), DummyString(40000)));
  dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
  {
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
              static_cast<uint64_t>(1));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
              static_cast<uint64_t>(1));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
              static_cast<uint64_t>(2));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
              static_cast<uint64_t>(2));
  }

  // Make 'dobrynia' and 'nikitich' both take up 40% of space
  // When 'pikachu' puts us over 100%, all 3 flush.
  ASSERT_OK(Put(2, Key(2), DummyString(40000)));
  ASSERT_OK(Put(1, Key(2), DummyString(20000)));
  ASSERT_OK(Put(0, Key(1), DummyString(1)));
  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
  {
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
              static_cast<uint64_t>(1));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
              static_cast<uint64_t>(2));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
              static_cast<uint64_t>(3));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
              static_cast<uint64_t>(3));
  }

  // Some remaining writes so 'default' and 'nikitich' flush on closure.
  ASSERT_OK(Put(3, Key(1), DummyString(1)));
  ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"},
                           options);
  {
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
              static_cast<uint64_t>(2));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
              static_cast<uint64_t>(2));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
              static_cast<uint64_t>(3));
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
              static_cast<uint64_t>(4));
  }
}

I
Igor Sugak 已提交
8218
TEST_F(DBTest, PurgeInfoLogs) {
8219 8220 8221 8222 8223 8224 8225 8226 8227 8228 8229
  Options options = CurrentOptions();
  options.keep_log_file_num = 5;
  options.create_if_missing = true;
  for (int mode = 0; mode <= 1; mode++) {
    if (mode == 1) {
      options.db_log_dir = dbname_ + "_logs";
      env_->CreateDirIfMissing(options.db_log_dir);
    } else {
      options.db_log_dir = "";
    }
    for (int i = 0; i < 8; i++) {
L
Lei Jin 已提交
8230
      Reopen(options);
8231 8232 8233 8234 8235 8236 8237 8238 8239 8240 8241 8242
    }

    std::vector<std::string> files;
    env_->GetChildren(options.db_log_dir.empty() ? dbname_ : options.db_log_dir,
                      &files);
    int info_log_count = 0;
    for (std::string file : files) {
      if (file.find("LOG") != std::string::npos) {
        info_log_count++;
      }
    }
    ASSERT_EQ(5, info_log_count);
8243

L
Lei Jin 已提交
8244
    Destroy(options);
I
Igor Canadi 已提交
8245
    // For mode (1), test DestroyDB() to delete all the logs under DB dir.
8246 8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258 8259 8260
    // For mode (2), no info log file should have been put under DB dir.
    std::vector<std::string> db_files;
    env_->GetChildren(dbname_, &db_files);
    for (std::string file : db_files) {
      ASSERT_TRUE(file.find("LOG") == std::string::npos);
    }

    if (mode == 1) {
      // Cleaning up
      env_->GetChildren(options.db_log_dir, &files);
      for (std::string file : files) {
        env_->DeleteFile(options.db_log_dir + "/" + file);
      }
      env_->DeleteDir(options.db_log_dir);
    }
8261 8262 8263
  }
}

I
Igor Canadi 已提交
8264
namespace {
8265 8266 8267 8268
SequenceNumber ReadRecords(
    std::unique_ptr<TransactionLogIterator>& iter,
    int& count) {
  count = 0;
8269
  SequenceNumber lastSequence = 0;
8270
  BatchResult res;
8271
  while (iter->Valid()) {
8272
    res = iter->GetBatch();
8273
    EXPECT_TRUE(res.sequence > lastSequence);
8274
    ++count;
8275
    lastSequence = res.sequence;
8276
    EXPECT_OK(iter->status());
8277 8278
    iter->Next();
  }
8279 8280 8281 8282 8283 8284 8285 8286 8287
  return res.sequence;
}

void ExpectRecords(
    const int expected_no_records,
    std::unique_ptr<TransactionLogIterator>& iter) {
  int num_records;
  ReadRecords(iter, num_records);
  ASSERT_EQ(num_records, expected_no_records);
8288
}
I
Igor Canadi 已提交
8289
}  // namespace
8290

I
Igor Sugak 已提交
8291
TEST_F(DBTest, TransactionLogIterator) {
8292 8293
  do {
    Options options = OptionsForLogIterTest();
L
Lei Jin 已提交
8294
    DestroyAndReopen(options);
L
Lei Jin 已提交
8295
    CreateAndReopenWithCF({"pikachu"}, options);
8296 8297 8298
    Put(0, "key1", DummyString(1024));
    Put(1, "key2", DummyString(1024));
    Put(1, "key2", DummyString(1024));
8299 8300 8301 8302 8303
    ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3U);
    {
      auto iter = OpenTransactionLogIter(0);
      ExpectRecords(3, iter);
    }
L
Lei Jin 已提交
8304
    ReopenWithColumnFamilies({"default", "pikachu"}, options);
8305 8306 8307 8308 8309
    env_->SleepForMicroseconds(2 * 1000 * 1000);
    {
      Put(0, "key4", DummyString(1024));
      Put(1, "key5", DummyString(1024));
      Put(0, "key6", DummyString(1024));
8310 8311 8312 8313 8314 8315
    }
    {
      auto iter = OpenTransactionLogIter(0);
      ExpectRecords(6, iter);
    }
  } while (ChangeCompactOptions());
8316 8317
}

8318
#ifndef NDEBUG // sync point is not included with DNDEBUG build
I
Igor Sugak 已提交
8319
TEST_F(DBTest, TransactionLogIteratorRace) {
8320
  static const int LOG_ITERATOR_RACE_TEST_COUNT = 2;
I
Igor Canadi 已提交
8321 8322 8323 8324 8325 8326 8327
  static const char* sync_points[LOG_ITERATOR_RACE_TEST_COUNT][4] = {
      {"WalManager::GetSortedWalFiles:1",  "WalManager::PurgeObsoleteFiles:1",
       "WalManager::PurgeObsoleteFiles:2", "WalManager::GetSortedWalFiles:2"},
      {"WalManager::GetSortedWalsOfType:1",
       "WalManager::PurgeObsoleteFiles:1",
       "WalManager::PurgeObsoleteFiles:2",
       "WalManager::GetSortedWalsOfType:2"}};
8328 8329 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339
  for (int test = 0; test < LOG_ITERATOR_RACE_TEST_COUNT; ++test) {
    // Setup sync point dependency to reproduce the race condition of
    // a log file moved to archived dir, in the middle of GetSortedWalFiles
    rocksdb::SyncPoint::GetInstance()->LoadDependency(
      { { sync_points[test][0], sync_points[test][1] },
        { sync_points[test][2], sync_points[test][3] },
      });

    do {
      rocksdb::SyncPoint::GetInstance()->ClearTrace();
      rocksdb::SyncPoint::GetInstance()->DisableProcessing();
      Options options = OptionsForLogIterTest();
L
Lei Jin 已提交
8340
      DestroyAndReopen(options);
8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353
      Put("key1", DummyString(1024));
      dbfull()->Flush(FlushOptions());
      Put("key2", DummyString(1024));
      dbfull()->Flush(FlushOptions());
      Put("key3", DummyString(1024));
      dbfull()->Flush(FlushOptions());
      Put("key4", DummyString(1024));
      ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4U);

      {
        auto iter = OpenTransactionLogIter(0);
        ExpectRecords(4, iter);
      }
8354

8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371
      rocksdb::SyncPoint::GetInstance()->EnableProcessing();
      // trigger async flush, and log move. Well, log move will
      // wait until the GetSortedWalFiles:1 to reproduce the race
      // condition
      FlushOptions flush_options;
      flush_options.wait = false;
      dbfull()->Flush(flush_options);

      // "key5" would be written in a new memtable and log
      Put("key5", DummyString(1024));
      {
        // this iter would miss "key4" if not fixed
        auto iter = OpenTransactionLogIter(0);
        ExpectRecords(5, iter);
      }
    } while (ChangeCompactOptions());
  }
8372
}
8373
#endif
8374

I
Igor Sugak 已提交
8375
TEST_F(DBTest, TransactionLogIteratorStallAtLastRecord) {
8376 8377
  do {
    Options options = OptionsForLogIterTest();
L
Lei Jin 已提交
8378
    DestroyAndReopen(options);
8379 8380 8381 8382 8383 8384 8385 8386 8387 8388 8389 8390
    Put("key1", DummyString(1024));
    auto iter = OpenTransactionLogIter(0);
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    iter->Next();
    ASSERT_TRUE(!iter->Valid());
    ASSERT_OK(iter->status());
    Put("key2", DummyString(1024));
    iter->Next();
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
  } while (ChangeCompactOptions());
8391 8392
}

I
Igor Sugak 已提交
8393
TEST_F(DBTest, TransactionLogIteratorCheckAfterRestart) {
8394 8395
  do {
    Options options = OptionsForLogIterTest();
L
Lei Jin 已提交
8396
    DestroyAndReopen(options);
8397 8398 8399
    Put("key1", DummyString(1024));
    Put("key2", DummyString(1023));
    dbfull()->Flush(FlushOptions());
L
Lei Jin 已提交
8400
    Reopen(options);
8401 8402 8403
    auto iter = OpenTransactionLogIter(0);
    ExpectRecords(2, iter);
  } while (ChangeCompactOptions());
8404 8405
}

I
Igor Sugak 已提交
8406
TEST_F(DBTest, TransactionLogIteratorCorruptedLog) {
8407 8408
  do {
    Options options = OptionsForLogIterTest();
L
Lei Jin 已提交
8409
    DestroyAndReopen(options);
8410
    for (int i = 0; i < 1024; i++) {
8411
      Put("key"+ToString(i), DummyString(10));
8412 8413 8414 8415 8416
    }
    dbfull()->Flush(FlushOptions());
    // Corrupt this log to create a gap
    rocksdb::VectorLogPtr wal_files;
    ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files));
8417 8418 8419 8420 8421 8422 8423 8424
    const auto logfile_path = dbname_ + "/" + wal_files.front()->PathName();
    if (mem_env_) {
      mem_env_->Truncate(logfile_path, wal_files.front()->SizeFileBytes() / 2);
    } else {
      ASSERT_EQ(0, truncate(logfile_path.c_str(),
                   wal_files.front()->SizeFileBytes() / 2));
    }

8425 8426 8427 8428 8429 8430
    // Insert a new entry to a new log file
    Put("key1025", DummyString(10));
    // Try to read from the beginning. Should stop before the gap and read less
    // than 1025 entries
    auto iter = OpenTransactionLogIter(0);
    int count;
8431 8432
    SequenceNumber last_sequence_read = ReadRecords(iter, count);
    ASSERT_LT(last_sequence_read, 1025U);
8433 8434 8435 8436 8437 8438
    // Try to read past the gap, should be able to seek to key1025
    auto iter2 = OpenTransactionLogIter(last_sequence_read + 1);
    ExpectRecords(1, iter2);
  } while (ChangeCompactOptions());
}

I
Igor Sugak 已提交
8439
TEST_F(DBTest, TransactionLogIteratorBatchOperations) {
8440 8441
  do {
    Options options = OptionsForLogIterTest();
L
Lei Jin 已提交
8442
    DestroyAndReopen(options);
L
Lei Jin 已提交
8443
    CreateAndReopenWithCF({"pikachu"}, options);
8444
    WriteBatch batch;
I
Igor Canadi 已提交
8445 8446 8447 8448
    batch.Put(handles_[1], "key1", DummyString(1024));
    batch.Put(handles_[0], "key2", DummyString(1024));
    batch.Put(handles_[1], "key3", DummyString(1024));
    batch.Delete(handles_[0], "key2");
8449
    dbfull()->Write(WriteOptions(), &batch);
8450 8451
    Flush(1);
    Flush(0);
L
Lei Jin 已提交
8452
    ReopenWithColumnFamilies({"default", "pikachu"}, options);
8453
    Put(1, "key4", DummyString(1024));
8454
    auto iter = OpenTransactionLogIter(3);
8455
    ExpectRecords(2, iter);
8456
  } while (ChangeCompactOptions());
8457 8458
}

I
Igor Sugak 已提交
8459
TEST_F(DBTest, TransactionLogIteratorBlobs) {
J
Jim Paton 已提交
8460
  Options options = OptionsForLogIterTest();
L
Lei Jin 已提交
8461
  DestroyAndReopen(options);
L
Lei Jin 已提交
8462
  CreateAndReopenWithCF({"pikachu"}, options);
J
Jim Paton 已提交
8463 8464
  {
    WriteBatch batch;
I
Igor Canadi 已提交
8465 8466
    batch.Put(handles_[1], "key1", DummyString(1024));
    batch.Put(handles_[0], "key2", DummyString(1024));
J
Jim Paton 已提交
8467
    batch.PutLogData(Slice("blob1"));
I
Igor Canadi 已提交
8468
    batch.Put(handles_[1], "key3", DummyString(1024));
J
Jim Paton 已提交
8469
    batch.PutLogData(Slice("blob2"));
I
Igor Canadi 已提交
8470
    batch.Delete(handles_[0], "key2");
J
Jim Paton 已提交
8471
    dbfull()->Write(WriteOptions(), &batch);
L
Lei Jin 已提交
8472
    ReopenWithColumnFamilies({"default", "pikachu"}, options);
J
Jim Paton 已提交
8473 8474 8475 8476 8477
  }

  auto res = OpenTransactionLogIter(0)->GetBatch();
  struct Handler : public WriteBatch::Handler {
    std::string seen;
I
Igor Sugak 已提交
8478 8479
    virtual Status PutCF(uint32_t cf, const Slice& key,
                         const Slice& value) override {
8480 8481
      seen += "Put(" + ToString(cf) + ", " + key.ToString() + ", " +
              ToString(value.size()) + ")";
8482
      return Status::OK();
J
Jim Paton 已提交
8483
    }
I
Igor Sugak 已提交
8484 8485
    virtual Status MergeCF(uint32_t cf, const Slice& key,
                           const Slice& value) override {
8486 8487
      seen += "Merge(" + ToString(cf) + ", " + key.ToString() + ", " +
              ToString(value.size()) + ")";
8488
      return Status::OK();
J
Jim Paton 已提交
8489
    }
I
Igor Sugak 已提交
8490
    virtual void LogData(const Slice& blob) override {
J
Jim Paton 已提交
8491 8492
      seen += "LogData(" + blob.ToString() + ")";
    }
I
Igor Sugak 已提交
8493
    virtual Status DeleteCF(uint32_t cf, const Slice& key) override {
8494
      seen += "Delete(" + ToString(cf) + ", " + key.ToString() + ")";
8495
      return Status::OK();
J
Jim Paton 已提交
8496 8497 8498
    }
  } handler;
  res.writeBatchPtr->Iterate(&handler);
8499 8500 8501 8502 8503 8504 8505 8506
  ASSERT_EQ(
      "Put(1, key1, 1024)"
      "Put(0, key2, 1024)"
      "LogData(blob1)"
      "Put(1, key3, 1024)"
      "LogData(blob2)"
      "Delete(0, key2)",
      handler.seen);
J
Jim Paton 已提交
8507 8508
}

8509 8510 8511
// Multi-threaded test:
namespace {

8512 8513
static const int kColumnFamilies = 10;
static const int kNumThreads = 10;
8514 8515 8516 8517 8518
static const int kTestSeconds = 10;
static const int kNumKeys = 1000;

struct MTState {
  DBTest* test;
I
Igor Canadi 已提交
8519 8520 8521
  std::atomic<bool> stop;
  std::atomic<int> counter[kNumThreads];
  std::atomic<bool> thread_done[kNumThreads];
8522 8523 8524 8525 8526 8527 8528 8529 8530
};

struct MTThread {
  MTState* state;
  int id;
};

static void MTThreadBody(void* arg) {
  MTThread* t = reinterpret_cast<MTThread*>(arg);
8531
  int id = t->id;
8532
  DB* db = t->state->test->db_;
I
Igor Canadi 已提交
8533
  int counter = 0;
8534 8535
  fprintf(stderr, "... starting thread %d\n", id);
  Random rnd(1000 + id);
8536
  char valbuf[1500];
I
Igor Canadi 已提交
8537 8538
  while (t->state->stop.load(std::memory_order_acquire) == false) {
    t->state->counter[id].store(counter, std::memory_order_release);
8539 8540 8541 8542 8543 8544

    int key = rnd.Uniform(kNumKeys);
    char keybuf[20];
    snprintf(keybuf, sizeof(keybuf), "%016d", key);

    if (rnd.OneIn(2)) {
8545 8546
      // Write values of the form <key, my id, counter, cf, unique_id>.
      // into each of the CFs
8547
      // We add some padding for force compactions.
8548
      int unique_id = rnd.Uniform(1000000);
8549 8550 8551 8552 8553 8554 8555 8556 8557 8558 8559 8560 8561 8562 8563 8564 8565 8566 8567

      // Half of the time directly use WriteBatch. Half of the time use
      // WriteBatchWithIndex.
      if (rnd.OneIn(2)) {
        WriteBatch batch;
        for (int cf = 0; cf < kColumnFamilies; ++cf) {
          snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
                   static_cast<int>(counter), cf, unique_id);
          batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf));
        }
        ASSERT_OK(db->Write(WriteOptions(), &batch));
      } else {
        WriteBatchWithIndex batch(db->GetOptions().comparator);
        for (int cf = 0; cf < kColumnFamilies; ++cf) {
          snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
                   static_cast<int>(counter), cf, unique_id);
          batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf));
        }
        ASSERT_OK(db->Write(WriteOptions(), batch.GetWriteBatch()));
8568
      }
8569
    } else {
8570 8571 8572 8573 8574 8575 8576 8577 8578 8579 8580 8581 8582 8583
      // Read a value and verify that it matches the pattern written above
      // and that writes to all column families were atomic (unique_id is the
      // same)
      std::vector<Slice> keys(kColumnFamilies, Slice(keybuf));
      std::vector<std::string> values;
      std::vector<Status> statuses =
          db->MultiGet(ReadOptions(), t->state->test->handles_, keys, &values);
      Status s = statuses[0];
      // all statuses have to be the same
      for (size_t i = 1; i < statuses.size(); ++i) {
        // they are either both ok or both not-found
        ASSERT_TRUE((s.ok() && statuses[i].ok()) ||
                    (s.IsNotFound() && statuses[i].IsNotFound()));
      }
8584 8585 8586 8587 8588
      if (s.IsNotFound()) {
        // Key has not yet been written
      } else {
        // Check that the writer thread counter is >= the counter in the value
        ASSERT_OK(s);
8589 8590 8591 8592 8593 8594 8595 8596 8597
        int unique_id = -1;
        for (int i = 0; i < kColumnFamilies; ++i) {
          int k, w, c, cf, u;
          ASSERT_EQ(5, sscanf(values[i].c_str(), "%d.%d.%d.%d.%d", &k, &w,
                              &c, &cf, &u))
              << values[i];
          ASSERT_EQ(k, key);
          ASSERT_GE(w, 0);
          ASSERT_LT(w, kNumThreads);
I
Igor Canadi 已提交
8598
          ASSERT_LE(c, t->state->counter[w].load(std::memory_order_acquire));
8599 8600 8601 8602 8603 8604 8605 8606 8607
          ASSERT_EQ(cf, i);
          if (i == 0) {
            unique_id = u;
          } else {
            // this checks that updates across column families happened
            // atomically -- all unique ids are the same
            ASSERT_EQ(u, unique_id);
          }
        }
8608 8609 8610 8611
      }
    }
    counter++;
  }
I
Igor Canadi 已提交
8612
  t->state->thread_done[id].store(true, std::memory_order_release);
8613
  fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter));
8614 8615
}

H
Hans Wennborg 已提交
8616
}  // namespace
8617

8618 8619 8620 8621 8622 8623 8624 8625 8626 8627 8628 8629 8630 8631 8632 8633 8634 8635
class MultiThreadedDBTest : public DBTest,
                            public ::testing::WithParamInterface<int> {
 public:
  virtual void SetUp() override { option_config_ = GetParam(); }

  static std::vector<int> GenerateOptionConfigs() {
    std::vector<int> optionConfigs;
    for (int optionConfig = kDefault; optionConfig < kEnd; ++optionConfig) {
      // skip as HashCuckooRep does not support snapshot
      if (optionConfig != kHashCuckoo) {
        optionConfigs.push_back(optionConfig);
      }
    }
    return optionConfigs;
  }
};

TEST_P(MultiThreadedDBTest, MultiThreaded) {
8636 8637
  anon::OptionsOverride options_override;
  options_override.skip_policy = kSkipNoSnapshot;
8638 8639 8640 8641 8642 8643 8644 8645 8646 8647 8648 8649 8650
  std::vector<std::string> cfs;
  for (int i = 1; i < kColumnFamilies; ++i) {
    cfs.push_back(ToString(i));
  }
  CreateAndReopenWithCF(cfs, CurrentOptions(options_override));
  // Initialize state
  MTState mt;
  mt.test = this;
  mt.stop.store(false, std::memory_order_release);
  for (int id = 0; id < kNumThreads; id++) {
    mt.counter[id].store(0, std::memory_order_release);
    mt.thread_done[id].store(false, std::memory_order_release);
  }
8651

8652 8653 8654 8655 8656 8657 8658
  // Start threads
  MTThread thread[kNumThreads];
  for (int id = 0; id < kNumThreads; id++) {
    thread[id].state = &mt;
    thread[id].id = id;
    env_->StartThread(MTThreadBody, &thread[id]);
  }
8659

8660 8661
  // Let them run for a while
  env_->SleepForMicroseconds(kTestSeconds * 1000000);
8662

8663 8664 8665 8666 8667
  // Stop the threads and wait for them to finish
  mt.stop.store(true, std::memory_order_release);
  for (int id = 0; id < kNumThreads; id++) {
    while (mt.thread_done[id].load(std::memory_order_acquire) == false) {
      env_->SleepForMicroseconds(100000);
8668
    }
8669
  }
8670 8671
}

8672 8673 8674 8675
INSTANTIATE_TEST_CASE_P(
    MultiThreaded, MultiThreadedDBTest,
    ::testing::ValuesIn(MultiThreadedDBTest::GenerateOptionConfigs()));

8676 8677 8678 8679 8680 8681 8682 8683 8684 8685 8686 8687 8688 8689 8690 8691 8692 8693 8694
// Group commit test:
namespace {

static const int kGCNumThreads = 4;
static const int kGCNumKeys = 1000;

struct GCThread {
  DB* db;
  int id;
  std::atomic<bool> done;
};

static void GCThreadBody(void* arg) {
  GCThread* t = reinterpret_cast<GCThread*>(arg);
  int id = t->id;
  DB* db = t->db;
  WriteOptions wo;

  for (int i = 0; i < kGCNumKeys; ++i) {
8695
    std::string kv(ToString(i + id * kGCNumKeys));
8696 8697 8698 8699 8700 8701 8702
    ASSERT_OK(db->Put(wo, kv, kv));
  }
  t->done = true;
}

}  // namespace

I
Igor Sugak 已提交
8703
TEST_F(DBTest, GroupCommitTest) {
8704
  do {
8705
    Options options = CurrentOptions();
8706 8707
    options.env = env_;
    env_->log_write_slowdown_.store(100);
8708
    options.statistics = rocksdb::CreateDBStatistics();
L
Lei Jin 已提交
8709
    Reopen(options);
8710

8711 8712 8713 8714 8715 8716 8717 8718 8719 8720 8721 8722 8723 8724
    // Start threads
    GCThread thread[kGCNumThreads];
    for (int id = 0; id < kGCNumThreads; id++) {
      thread[id].id = id;
      thread[id].db = db_;
      thread[id].done = false;
      env_->StartThread(GCThreadBody, &thread[id]);
    }

    for (int id = 0; id < kGCNumThreads; id++) {
      while (thread[id].done == false) {
        env_->SleepForMicroseconds(100000);
      }
    }
8725 8726
    env_->log_write_slowdown_.store(0);

8727
    ASSERT_GT(TestGetTickerCount(options, WRITE_DONE_BY_OTHER), 0);
8728 8729 8730

    std::vector<std::string> expected_db;
    for (int i = 0; i < kGCNumThreads * kGCNumKeys; ++i) {
8731
      expected_db.push_back(ToString(i));
8732 8733 8734 8735 8736 8737 8738 8739 8740 8741 8742 8743
    }
    sort(expected_db.begin(), expected_db.end());

    Iterator* itr = db_->NewIterator(ReadOptions());
    itr->SeekToFirst();
    for (auto x : expected_db) {
      ASSERT_TRUE(itr->Valid());
      ASSERT_EQ(itr->key().ToString(), x);
      ASSERT_EQ(itr->value().ToString(), x);
      itr->Next();
    }
    ASSERT_TRUE(!itr->Valid());
I
Igor Canadi 已提交
8744
    delete itr;
8745

8746 8747 8748
    HistogramData hist_data = {0};
    options.statistics->histogramData(DB_WRITE, &hist_data);
    ASSERT_GT(hist_data.average, 0.0);
8749
  } while (ChangeOptions(kSkipNoSeekToLast));
8750 8751
}

8752 8753 8754 8755
namespace {
typedef std::map<std::string, std::string> KVMap;
}

J
jorlow@chromium.org 已提交
8756 8757
class ModelDB: public DB {
 public:
8758 8759 8760
  class ModelSnapshot : public Snapshot {
   public:
    KVMap map_;
I
Igor Canadi 已提交
8761

I
Igor Sugak 已提交
8762
    virtual SequenceNumber GetSequenceNumber() const override {
I
Igor Canadi 已提交
8763 8764 8765 8766
      // no need to call this
      assert(false);
      return 0;
    }
8767 8768
  };

8769
  explicit ModelDB(const Options& options) : options_(options) {}
8770
  using DB::Put;
8771
  virtual Status Put(const WriteOptions& o, ColumnFamilyHandle* cf,
I
Igor Sugak 已提交
8772
                     const Slice& k, const Slice& v) override {
8773
    WriteBatch batch;
I
Igor Canadi 已提交
8774
    batch.Put(cf, k, v);
8775
    return Write(o, &batch);
8776 8777
  }
  using DB::Merge;
8778
  virtual Status Merge(const WriteOptions& o, ColumnFamilyHandle* cf,
I
Igor Sugak 已提交
8779
                       const Slice& k, const Slice& v) override {
8780
    WriteBatch batch;
I
Igor Canadi 已提交
8781
    batch.Merge(cf, k, v);
8782
    return Write(o, &batch);
8783 8784
  }
  using DB::Delete;
8785
  virtual Status Delete(const WriteOptions& o, ColumnFamilyHandle* cf,
I
Igor Sugak 已提交
8786
                        const Slice& key) override {
8787
    WriteBatch batch;
I
Igor Canadi 已提交
8788
    batch.Delete(cf, key);
8789
    return Write(o, &batch);
8790 8791
  }
  using DB::Get;
8792
  virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* cf,
I
Igor Sugak 已提交
8793
                     const Slice& key, std::string* value) override {
8794 8795 8796
    return Status::NotSupported(key);
  }

8797 8798 8799
  using DB::MultiGet;
  virtual std::vector<Status> MultiGet(
      const ReadOptions& options,
8800
      const std::vector<ColumnFamilyHandle*>& column_family,
I
Igor Sugak 已提交
8801 8802
      const std::vector<Slice>& keys,
      std::vector<std::string>* values) override {
8803 8804 8805
    std::vector<Status> s(keys.size(),
                          Status::NotSupported("Not implemented."));
    return s;
J
jorlow@chromium.org 已提交
8806
  }
8807

I
Igor Canadi 已提交
8808
  using DB::GetPropertiesOfAllTables;
I
Igor Sugak 已提交
8809 8810 8811
  virtual Status GetPropertiesOfAllTables(
      ColumnFamilyHandle* column_family,
      TablePropertiesCollection* props) override {
8812 8813
    return Status();
  }
8814

8815
  using DB::KeyMayExist;
8816
  virtual bool KeyMayExist(const ReadOptions& options,
8817
                           ColumnFamilyHandle* column_family, const Slice& key,
I
Igor Sugak 已提交
8818 8819
                           std::string* value,
                           bool* value_found = nullptr) override {
8820 8821 8822
    if (value_found != nullptr) {
      *value_found = false;
    }
8823 8824
    return true; // Not Supported directly
  }
8825 8826
  using DB::NewIterator;
  virtual Iterator* NewIterator(const ReadOptions& options,
I
Igor Sugak 已提交
8827
                                ColumnFamilyHandle* column_family) override {
A
Abhishek Kona 已提交
8828
    if (options.snapshot == nullptr) {
J
jorlow@chromium.org 已提交
8829 8830 8831 8832 8833
      KVMap* saved = new KVMap;
      *saved = map_;
      return new ModelIter(saved, true);
    } else {
      const KVMap* snapshot_state =
8834
          &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
J
jorlow@chromium.org 已提交
8835 8836 8837
      return new ModelIter(snapshot_state, false);
    }
  }
8838 8839
  virtual Status NewIterators(
      const ReadOptions& options,
8840
      const std::vector<ColumnFamilyHandle*>& column_family,
I
Igor Sugak 已提交
8841
      std::vector<Iterator*>* iterators) override {
8842 8843
    return Status::NotSupported("Not supported yet");
  }
I
Igor Sugak 已提交
8844
  virtual const Snapshot* GetSnapshot() override {
8845 8846 8847
    ModelSnapshot* snapshot = new ModelSnapshot;
    snapshot->map_ = map_;
    return snapshot;
J
jorlow@chromium.org 已提交
8848 8849
  }

I
Igor Sugak 已提交
8850
  virtual void ReleaseSnapshot(const Snapshot* snapshot) override {
8851
    delete reinterpret_cast<const ModelSnapshot*>(snapshot);
J
jorlow@chromium.org 已提交
8852
  }
8853

I
Igor Sugak 已提交
8854 8855
  virtual Status Write(const WriteOptions& options,
                       WriteBatch* batch) override {
8856 8857 8858
    class Handler : public WriteBatch::Handler {
     public:
      KVMap* map_;
I
Igor Sugak 已提交
8859
      virtual void Put(const Slice& key, const Slice& value) override {
8860
        (*map_)[key.ToString()] = value.ToString();
J
jorlow@chromium.org 已提交
8861
      }
I
Igor Sugak 已提交
8862
      virtual void Merge(const Slice& key, const Slice& value) override {
8863 8864 8865
        // ignore merge for now
        //(*map_)[key.ToString()] = value.ToString();
      }
I
Igor Sugak 已提交
8866
      virtual void Delete(const Slice& key) override {
8867 8868 8869 8870 8871 8872
        map_->erase(key.ToString());
      }
    };
    Handler handler;
    handler.map_ = &map_;
    return batch->Iterate(&handler);
J
jorlow@chromium.org 已提交
8873 8874
  }

8875
  using DB::GetProperty;
8876
  virtual bool GetProperty(ColumnFamilyHandle* column_family,
I
Igor Sugak 已提交
8877
                           const Slice& property, std::string* value) override {
J
jorlow@chromium.org 已提交
8878 8879
    return false;
  }
8880 8881 8882 8883 8884
  using DB::GetIntProperty;
  virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
                              const Slice& property, uint64_t* value) override {
    return false;
  }
8885
  using DB::GetApproximateSizes;
8886
  virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
I
Igor Sugak 已提交
8887 8888
                                   const Range* range, int n,
                                   uint64_t* sizes) override {
J
jorlow@chromium.org 已提交
8889 8890 8891 8892
    for (int i = 0; i < n; i++) {
      sizes[i] = 0;
    }
  }
8893
  using DB::CompactRange;
8894
  virtual Status CompactRange(ColumnFamilyHandle* column_family,
8895
                              const Slice* start, const Slice* end,
8896
                              bool reduce_level, int target_level,
I
Igor Sugak 已提交
8897
                              uint32_t output_path_id) override {
L
Lei Jin 已提交
8898
    return Status::NotSupported("Not supported operation.");
G
Gabor Cselle 已提交
8899 8900
  }

8901 8902 8903 8904 8905 8906 8907 8908 8909
  using DB::CompactFiles;
  virtual Status CompactFiles(
      const CompactionOptions& compact_options,
      ColumnFamilyHandle* column_family,
      const std::vector<std::string>& input_file_names,
      const int output_level, const int output_path_id = -1) override {
    return Status::NotSupported("Not supported operation.");
  }

8910
  using DB::NumberLevels;
I
Igor Sugak 已提交
8911 8912 8913
  virtual int NumberLevels(ColumnFamilyHandle* column_family) override {
    return 1;
  }
8914

8915
  using DB::MaxMemCompactionLevel;
I
Igor Sugak 已提交
8916 8917
  virtual int MaxMemCompactionLevel(
      ColumnFamilyHandle* column_family) override {
8918
    return 1;
8919 8920
  }

8921
  using DB::Level0StopWriteTrigger;
I
Igor Sugak 已提交
8922 8923
  virtual int Level0StopWriteTrigger(
      ColumnFamilyHandle* column_family) override {
8924
    return -1;
8925 8926
  }

I
Igor Sugak 已提交
8927
  virtual const std::string& GetName() const override { return name_; }
I
Igor Canadi 已提交
8928

I
Igor Sugak 已提交
8929
  virtual Env* GetEnv() const override { return nullptr; }
8930

8931
  using DB::GetOptions;
I
Igor Sugak 已提交
8932 8933
  virtual const Options& GetOptions(
      ColumnFamilyHandle* column_family) const override {
I
Igor Canadi 已提交
8934 8935 8936
    return options_;
  }

8937 8938 8939
  using DB::GetDBOptions;
  virtual const DBOptions& GetDBOptions() const override { return options_; }

8940 8941
  using DB::Flush;
  virtual Status Flush(const rocksdb::FlushOptions& options,
I
Igor Sugak 已提交
8942
                       ColumnFamilyHandle* column_family) override {
H
heyongqiang 已提交
8943 8944 8945 8946
    Status ret;
    return ret;
  }

I
Igor Sugak 已提交
8947 8948
  virtual Status DisableFileDeletions() override { return Status::OK(); }
  virtual Status EnableFileDeletions(bool force) override {
8949 8950
    return Status::OK();
  }
8951
  virtual Status GetLiveFiles(std::vector<std::string>&, uint64_t* size,
I
Igor Sugak 已提交
8952
                              bool flush_memtable = true) override {
8953 8954 8955
    return Status::OK();
  }

I
Igor Sugak 已提交
8956
  virtual Status GetSortedWalFiles(VectorLogPtr& files) override {
8957 8958 8959
    return Status::OK();
  }

I
Igor Sugak 已提交
8960
  virtual Status DeleteFile(std::string name) override { return Status::OK(); }
8961

8962
  virtual Status GetDbIdentity(std::string& identity) const override {
8963 8964 8965
    return Status::OK();
  }

I
Igor Sugak 已提交
8966
  virtual SequenceNumber GetLatestSequenceNumber() const override { return 0; }
8967 8968 8969
  virtual Status GetUpdatesSince(
      rocksdb::SequenceNumber, unique_ptr<rocksdb::TransactionLogIterator>*,
      const TransactionLogIterator::ReadOptions&
I
Igor Sugak 已提交
8970
          read_options = TransactionLogIterator::ReadOptions()) override {
8971 8972 8973
    return Status::NotSupported("Not supported in Model DB");
  }

I
Igor Sugak 已提交
8974 8975 8976
  virtual ColumnFamilyHandle* DefaultColumnFamily() const override {
    return nullptr;
  }
8977

8978 8979
  virtual void GetColumnFamilyMetaData(
      ColumnFamilyHandle* column_family,
I
Igor Sugak 已提交
8980
      ColumnFamilyMetaData* metadata) override {}
8981

J
jorlow@chromium.org 已提交
8982 8983 8984 8985 8986 8987 8988 8989 8990
 private:
  class ModelIter: public Iterator {
   public:
    ModelIter(const KVMap* map, bool owned)
        : map_(map), owned_(owned), iter_(map_->end()) {
    }
    ~ModelIter() {
      if (owned_) delete map_;
    }
I
Igor Sugak 已提交
8991 8992 8993
    virtual bool Valid() const override { return iter_ != map_->end(); }
    virtual void SeekToFirst() override { iter_ = map_->begin(); }
    virtual void SeekToLast() override {
J
jorlow@chromium.org 已提交
8994 8995 8996 8997 8998 8999
      if (map_->empty()) {
        iter_ = map_->end();
      } else {
        iter_ = map_->find(map_->rbegin()->first);
      }
    }
I
Igor Sugak 已提交
9000
    virtual void Seek(const Slice& k) override {
J
jorlow@chromium.org 已提交
9001 9002
      iter_ = map_->lower_bound(k.ToString());
    }
I
Igor Sugak 已提交
9003 9004
    virtual void Next() override { ++iter_; }
    virtual void Prev() override {
S
Stanislau Hlebik 已提交
9005 9006 9007 9008 9009 9010 9011
      if (iter_ == map_->begin()) {
        iter_ = map_->end();
        return;
      }
      --iter_;
    }

I
Igor Sugak 已提交
9012 9013 9014 9015
    virtual Slice key() const override { return iter_->first; }
    virtual Slice value() const override { return iter_->second; }
    virtual Status status() const override { return Status::OK(); }

J
jorlow@chromium.org 已提交
9016 9017 9018 9019 9020 9021 9022
   private:
    const KVMap* const map_;
    const bool owned_;  // Do we own map_
    KVMap::const_iterator iter_;
  };
  const Options options_;
  KVMap map_;
I
Igor Canadi 已提交
9023
  std::string name_ = "";
J
jorlow@chromium.org 已提交
9024 9025
};

J
Jim Paton 已提交
9026 9027 9028 9029 9030 9031 9032
static std::string RandomKey(Random* rnd, int minimum = 0) {
  int len;
  do {
    len = (rnd->OneIn(3)
           ? 1                // Short sometimes to encourage collisions
           : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
  } while (len < minimum);
J
jorlow@chromium.org 已提交
9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074 9075 9076 9077 9078 9079 9080 9081 9082
  return test::RandomKey(rnd, len);
}

static bool CompareIterators(int step,
                             DB* model,
                             DB* db,
                             const Snapshot* model_snap,
                             const Snapshot* db_snap) {
  ReadOptions options;
  options.snapshot = model_snap;
  Iterator* miter = model->NewIterator(options);
  options.snapshot = db_snap;
  Iterator* dbiter = db->NewIterator(options);
  bool ok = true;
  int count = 0;
  for (miter->SeekToFirst(), dbiter->SeekToFirst();
       ok && miter->Valid() && dbiter->Valid();
       miter->Next(), dbiter->Next()) {
    count++;
    if (miter->key().compare(dbiter->key()) != 0) {
      fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n",
              step,
              EscapeString(miter->key()).c_str(),
              EscapeString(dbiter->key()).c_str());
      ok = false;
      break;
    }

    if (miter->value().compare(dbiter->value()) != 0) {
      fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
              step,
              EscapeString(miter->key()).c_str(),
              EscapeString(miter->value()).c_str(),
              EscapeString(miter->value()).c_str());
      ok = false;
    }
  }

  if (ok) {
    if (miter->Valid() != dbiter->Valid()) {
      fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
              step, miter->Valid(), dbiter->Valid());
      ok = false;
    }
  }
  delete miter;
  delete dbiter;
  return ok;
}

I
Igor Sugak 已提交
9083
TEST_F(DBTest, Randomized) {
9084 9085
  anon::OptionsOverride options_override;
  options_override.skip_policy = kSkipNoSnapshot;
J
jorlow@chromium.org 已提交
9086
  Random rnd(test::RandomSeed());
S
Sanjay Ghemawat 已提交
9087
  do {
9088
    ModelDB model(CurrentOptions(options_override));
S
Sanjay Ghemawat 已提交
9089
    const int N = 10000;
A
Abhishek Kona 已提交
9090 9091
    const Snapshot* model_snap = nullptr;
    const Snapshot* db_snap = nullptr;
S
Sanjay Ghemawat 已提交
9092 9093 9094 9095
    std::string k, v;
    for (int step = 0; step < N; step++) {
      // TODO(sanjay): Test Get() works
      int p = rnd.Uniform(100);
J
Jim Paton 已提交
9096
      int minimum = 0;
9097
      if (option_config_ == kHashSkipList ||
9098
          option_config_ == kHashLinkList ||
9099
          option_config_ == kHashCuckoo ||
9100 9101 9102
          option_config_ == kPlainTableFirstBytePrefix ||
          option_config_ == kBlockBasedTableWithWholeKeyHashIndex ||
          option_config_ == kBlockBasedTableWithPrefixHashIndex) {
J
Jim Paton 已提交
9103 9104
        minimum = 1;
      }
S
Sanjay Ghemawat 已提交
9105
      if (p < 45) {                               // Put
J
Jim Paton 已提交
9106
        k = RandomKey(&rnd, minimum);
S
Sanjay Ghemawat 已提交
9107 9108 9109 9110 9111 9112 9113 9114
        v = RandomString(&rnd,
                         rnd.OneIn(20)
                         ? 100 + rnd.Uniform(100)
                         : rnd.Uniform(8));
        ASSERT_OK(model.Put(WriteOptions(), k, v));
        ASSERT_OK(db_->Put(WriteOptions(), k, v));

      } else if (p < 90) {                        // Delete
J
Jim Paton 已提交
9115
        k = RandomKey(&rnd, minimum);
S
Sanjay Ghemawat 已提交
9116 9117 9118 9119 9120 9121 9122 9123 9124
        ASSERT_OK(model.Delete(WriteOptions(), k));
        ASSERT_OK(db_->Delete(WriteOptions(), k));


      } else {                                    // Multi-element batch
        WriteBatch b;
        const int num = rnd.Uniform(8);
        for (int i = 0; i < num; i++) {
          if (i == 0 || !rnd.OneIn(10)) {
J
Jim Paton 已提交
9125
            k = RandomKey(&rnd, minimum);
S
Sanjay Ghemawat 已提交
9126 9127 9128 9129 9130 9131 9132 9133 9134 9135
          } else {
            // Periodically re-use the same key from the previous iter, so
            // we have multiple entries in the write batch for the same key
          }
          if (rnd.OneIn(2)) {
            v = RandomString(&rnd, rnd.Uniform(10));
            b.Put(k, v);
          } else {
            b.Delete(k);
          }
J
jorlow@chromium.org 已提交
9136
        }
S
Sanjay Ghemawat 已提交
9137 9138
        ASSERT_OK(model.Write(WriteOptions(), &b));
        ASSERT_OK(db_->Write(WriteOptions(), &b));
J
jorlow@chromium.org 已提交
9139 9140
      }

S
Sanjay Ghemawat 已提交
9141
      if ((step % 100) == 0) {
S
Stanislau Hlebik 已提交
9142 9143 9144 9145 9146 9147 9148 9149
        // For DB instances that use the hash index + block-based table, the
        // iterator will be invalid right when seeking a non-existent key, right
        // than return a key that is close to it.
        if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex &&
            option_config_ != kBlockBasedTableWithPrefixHashIndex) {
          ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
          ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
        }
9150

S
Sanjay Ghemawat 已提交
9151 9152 9153
        // Save a snapshot from each DB this time that we'll use next
        // time we compare things, to make sure the current state is
        // preserved with the snapshot
A
Abhishek Kona 已提交
9154 9155
        if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
        if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
J
jorlow@chromium.org 已提交
9156

L
Lei Jin 已提交
9157

9158
        auto options = CurrentOptions(options_override);
L
Lei Jin 已提交
9159
        Reopen(options);
A
Abhishek Kona 已提交
9160
        ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
J
jorlow@chromium.org 已提交
9161

S
Sanjay Ghemawat 已提交
9162 9163 9164
        model_snap = model.GetSnapshot();
        db_snap = db_->GetSnapshot();
      }
S
Stanislau Hlebik 已提交
9165 9166

      if ((step % 2000) == 0) {
9167
        fprintf(stderr,
S
Stanislau Hlebik 已提交
9168 9169 9170
                "DBTest.Randomized, option ID: %d, step: %d out of %d\n",
                option_config_, step, N);
      }
J
jorlow@chromium.org 已提交
9171
    }
A
Abhishek Kona 已提交
9172 9173
    if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
    if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
9174
    // skip cuckoo hash as it does not support snapshot.
S
Stanislau Hlebik 已提交
9175 9176
  } while (ChangeOptions(kSkipDeletesFilterFirst | kSkipNoSeekToLast |
                         kSkipHashCuckoo));
J
jorlow@chromium.org 已提交
9177 9178
}

I
Igor Sugak 已提交
9179
TEST_F(DBTest, MultiGetSimple) {
9180
  do {
L
Lei Jin 已提交
9181
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
9182 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194 9195 9196
    ASSERT_OK(Put(1, "k1", "v1"));
    ASSERT_OK(Put(1, "k2", "v2"));
    ASSERT_OK(Put(1, "k3", "v3"));
    ASSERT_OK(Put(1, "k4", "v4"));
    ASSERT_OK(Delete(1, "k4"));
    ASSERT_OK(Put(1, "k5", "v5"));
    ASSERT_OK(Delete(1, "no_key"));

    std::vector<Slice> keys({"k1", "k2", "k3", "k4", "k5", "no_key"});

    std::vector<std::string> values(20, "Temporary data to be overwritten");
    std::vector<ColumnFamilyHandle*> cfs(keys.size(), handles_[1]);

    std::vector<Status> s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
    ASSERT_EQ(values.size(), keys.size());
9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208
    ASSERT_EQ(values[0], "v1");
    ASSERT_EQ(values[1], "v2");
    ASSERT_EQ(values[2], "v3");
    ASSERT_EQ(values[4], "v5");

    ASSERT_OK(s[0]);
    ASSERT_OK(s[1]);
    ASSERT_OK(s[2]);
    ASSERT_TRUE(s[3].IsNotFound());
    ASSERT_OK(s[4]);
    ASSERT_TRUE(s[5].IsNotFound());
  } while (ChangeCompactOptions());
9209 9210
}

I
Igor Sugak 已提交
9211
TEST_F(DBTest, MultiGetEmpty) {
9212
  do {
L
Lei Jin 已提交
9213
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
9214 9215 9216
    // Empty Key Set
    std::vector<Slice> keys;
    std::vector<std::string> values;
9217 9218 9219
    std::vector<ColumnFamilyHandle*> cfs;
    std::vector<Status> s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
    ASSERT_EQ(s.size(), 0U);
9220 9221

    // Empty Database, Empty Key Set
L
Lei Jin 已提交
9222 9223 9224
    Options options = CurrentOptions();
    options.create_if_missing = true;
    DestroyAndReopen(options);
L
Lei Jin 已提交
9225
    CreateAndReopenWithCF({"pikachu"}, options);
9226 9227
    s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
    ASSERT_EQ(s.size(), 0U);
9228 9229 9230 9231 9232

    // Empty Database, Search for Keys
    keys.resize(2);
    keys[0] = "a";
    keys[1] = "b";
9233 9234 9235
    cfs.push_back(handles_[0]);
    cfs.push_back(handles_[1]);
    s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
9236 9237 9238
    ASSERT_EQ((int)s.size(), 2);
    ASSERT_TRUE(s[0].IsNotFound() && s[1].IsNotFound());
  } while (ChangeCompactOptions());
9239 9240
}

I
Igor Canadi 已提交
9241
namespace {
T
Tyler Harter 已提交
9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255 9256 9257 9258 9259 9260 9261 9262 9263
void PrefixScanInit(DBTest *dbtest) {
  char buf[100];
  std::string keystr;
  const int small_range_sstfiles = 5;
  const int big_range_sstfiles = 5;

  // Generate 11 sst files with the following prefix ranges.
  // GROUP 0: [0,10]                              (level 1)
  // GROUP 1: [1,2], [2,3], [3,4], [4,5], [5, 6]  (level 0)
  // GROUP 2: [0,6], [0,7], [0,8], [0,9], [0,10]  (level 0)
  //
  // A seek with the previous API would do 11 random I/Os (to all the
  // files).  With the new API and a prefix filter enabled, we should
  // only do 2 random I/O, to the 2 files containing the key.

  // GROUP 0
  snprintf(buf, sizeof(buf), "%02d______:start", 0);
  keystr = std::string(buf);
  ASSERT_OK(dbtest->Put(keystr, keystr));
  snprintf(buf, sizeof(buf), "%02d______:end", 10);
  keystr = std::string(buf);
  ASSERT_OK(dbtest->Put(keystr, keystr));
9264
  dbtest->Flush();
T
Tyler Harter 已提交
9265 9266 9267 9268 9269 9270 9271 9272 9273 9274
  dbtest->dbfull()->CompactRange(nullptr, nullptr); // move to level 1

  // GROUP 1
  for (int i = 1; i <= small_range_sstfiles; i++) {
    snprintf(buf, sizeof(buf), "%02d______:start", i);
    keystr = std::string(buf);
    ASSERT_OK(dbtest->Put(keystr, keystr));
    snprintf(buf, sizeof(buf), "%02d______:end", i+1);
    keystr = std::string(buf);
    ASSERT_OK(dbtest->Put(keystr, keystr));
9275
    dbtest->Flush();
T
Tyler Harter 已提交
9276 9277 9278 9279 9280 9281 9282 9283 9284 9285 9286
  }

  // GROUP 2
  for (int i = 1; i <= big_range_sstfiles; i++) {
    snprintf(buf, sizeof(buf), "%02d______:start", 0);
    keystr = std::string(buf);
    ASSERT_OK(dbtest->Put(keystr, keystr));
    snprintf(buf, sizeof(buf), "%02d______:end",
             small_range_sstfiles+i+1);
    keystr = std::string(buf);
    ASSERT_OK(dbtest->Put(keystr, keystr));
9287
    dbtest->Flush();
T
Tyler Harter 已提交
9288 9289
  }
}
I
Igor Canadi 已提交
9290
}  // namespace
T
Tyler Harter 已提交
9291

I
Igor Sugak 已提交
9292
TEST_F(DBTest, PrefixScan) {
V
Venkatesh Radhakrishnan 已提交
9293 9294
  XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip,
             kSkipNoPrefix);
9295 9296 9297 9298 9299 9300 9301 9302 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312
  while (ChangeFilterOptions()) {
    int count;
    Slice prefix;
    Slice key;
    char buf[100];
    Iterator* iter;
    snprintf(buf, sizeof(buf), "03______:");
    prefix = Slice(buf, 8);
    key = Slice(buf, 9);
    // db configs
    env_->count_random_reads_ = true;
    Options options = CurrentOptions();
    options.env = env_;
    options.prefix_extractor.reset(NewFixedPrefixTransform(8));
    options.disable_auto_compactions = true;
    options.max_background_compactions = 2;
    options.create_if_missing = true;
    options.memtable_factory.reset(NewHashSkipListRepFactory(16));
T
Tyler Harter 已提交
9313

9314 9315 9316 9317 9318
    BlockBasedTableOptions table_options;
    table_options.no_block_cache = true;
    table_options.filter_policy.reset(NewBloomFilterPolicy(10));
    table_options.whole_key_filtering = false;
    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
9319

9320
    // 11 RAND I/Os
L
Lei Jin 已提交
9321
    DestroyAndReopen(options);
9322 9323 9324 9325 9326 9327 9328 9329 9330
    PrefixScanInit(this);
    count = 0;
    env_->random_read_counter_.Reset();
    iter = db_->NewIterator(ReadOptions());
    for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
      if (! iter->key().starts_with(prefix)) {
        break;
      }
      count++;
T
Tyler Harter 已提交
9331
    }
9332 9333 9334 9335 9336 9337
    ASSERT_OK(iter->status());
    delete iter;
    ASSERT_EQ(count, 2);
    ASSERT_EQ(env_->random_read_counter_.Read(), 2);
    Close();
  }  // end of while
V
Venkatesh Radhakrishnan 已提交
9338
  XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip, 0);
T
Tyler Harter 已提交
9339 9340
}

I
Igor Sugak 已提交
9341
TEST_F(DBTest, TailingIteratorSingle) {
T
Tomislav Novak 已提交
9342 9343 9344 9345 9346 9347 9348 9349 9350 9351 9352 9353 9354 9355 9356 9357 9358
  ReadOptions read_options;
  read_options.tailing = true;

  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  iter->SeekToFirst();
  ASSERT_TRUE(!iter->Valid());

  // add a record and check that iter can see it
  ASSERT_OK(db_->Put(WriteOptions(), "mirko", "fodor"));
  iter->SeekToFirst();
  ASSERT_TRUE(iter->Valid());
  ASSERT_EQ(iter->key().ToString(), "mirko");

  iter->Next();
  ASSERT_TRUE(!iter->Valid());
}

I
Igor Sugak 已提交
9359
TEST_F(DBTest, TailingIteratorKeepAdding) {
L
Lei Jin 已提交
9360
  CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
T
Tomislav Novak 已提交
9361 9362 9363
  ReadOptions read_options;
  read_options.tailing = true;

9364
  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options, handles_[1]));
T
Tomislav Novak 已提交
9365 9366 9367 9368 9369 9370 9371 9372
  std::string value(1024, 'a');

  const int num_records = 10000;
  for (int i = 0; i < num_records; ++i) {
    char buf[32];
    snprintf(buf, sizeof(buf), "%016d", i);

    Slice key(buf, 16);
9373
    ASSERT_OK(Put(1, key, value));
T
Tomislav Novak 已提交
9374 9375 9376 9377 9378 9379 9380

    iter->Seek(key);
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(key), 0);
  }
}

I
Igor Sugak 已提交
9381
TEST_F(DBTest, TailingIteratorSeekToNext) {
L
Lei Jin 已提交
9382
  CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
L
Lei Jin 已提交
9383 9384 9385 9386 9387 9388 9389 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 9400 9401 9402 9403 9404 9405 9406 9407 9408 9409 9410 9411 9412 9413 9414 9415 9416 9417 9418 9419 9420 9421 9422 9423 9424 9425 9426 9427
  ReadOptions read_options;
  read_options.tailing = true;

  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options, handles_[1]));
  std::string value(1024, 'a');

  const int num_records = 1000;
  for (int i = 1; i < num_records; ++i) {
    char buf1[32];
    char buf2[32];
    snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5);

    Slice key(buf1, 20);
    ASSERT_OK(Put(1, key, value));

    if (i % 100 == 99) {
      ASSERT_OK(Flush(1));
    }

    snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2);
    Slice target(buf2, 20);
    iter->Seek(target);
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(key), 0);
  }
  for (int i = 2 * num_records; i > 0; --i) {
    char buf1[32];
    char buf2[32];
    snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5);

    Slice key(buf1, 20);
    ASSERT_OK(Put(1, key, value));

    if (i % 100 == 99) {
      ASSERT_OK(Flush(1));
    }

    snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2);
    Slice target(buf2, 20);
    iter->Seek(target);
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(key), 0);
  }
}

I
Igor Sugak 已提交
9428
TEST_F(DBTest, TailingIteratorDeletes) {
L
Lei Jin 已提交
9429
  CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
T
Tomislav Novak 已提交
9430 9431 9432
  ReadOptions read_options;
  read_options.tailing = true;

9433
  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options, handles_[1]));
T
Tomislav Novak 已提交
9434 9435

  // write a single record, read it using the iterator, then delete it
9436
  ASSERT_OK(Put(1, "0test", "test"));
T
Tomislav Novak 已提交
9437 9438 9439
  iter->SeekToFirst();
  ASSERT_TRUE(iter->Valid());
  ASSERT_EQ(iter->key().ToString(), "0test");
9440
  ASSERT_OK(Delete(1, "0test"));
T
Tomislav Novak 已提交
9441 9442 9443 9444 9445 9446 9447 9448 9449 9450

  // write many more records
  const int num_records = 10000;
  std::string value(1024, 'A');

  for (int i = 0; i < num_records; ++i) {
    char buf[32];
    snprintf(buf, sizeof(buf), "1%015d", i);

    Slice key(buf, 16);
9451
    ASSERT_OK(Put(1, key, value));
T
Tomislav Novak 已提交
9452 9453 9454
  }

  // force a flush to make sure that no records are read from memtable
9455
  ASSERT_OK(Flush(1));
T
Tomislav Novak 已提交
9456 9457 9458 9459 9460 9461 9462 9463 9464 9465 9466

  // skip "0test"
  iter->Next();

  // make sure we can read all new records using the existing iterator
  int count = 0;
  for (; iter->Valid(); iter->Next(), ++count) ;

  ASSERT_EQ(count, num_records);
}

I
Igor Sugak 已提交
9467
TEST_F(DBTest, TailingIteratorPrefixSeek) {
V
Venkatesh Radhakrishnan 已提交
9468 9469
  XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip,
             kSkipNoPrefix);
T
Tomislav Novak 已提交
9470 9471 9472 9473 9474 9475 9476
  ReadOptions read_options;
  read_options.tailing = true;

  Options options = CurrentOptions();
  options.env = env_;
  options.create_if_missing = true;
  options.disable_auto_compactions = true;
9477
  options.prefix_extractor.reset(NewFixedPrefixTransform(2));
9478
  options.memtable_factory.reset(NewHashSkipListRepFactory(16));
L
Lei Jin 已提交
9479
  DestroyAndReopen(options);
L
Lei Jin 已提交
9480
  CreateAndReopenWithCF({"pikachu"}, options);
T
Tomislav Novak 已提交
9481

9482 9483
  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options, handles_[1]));
  ASSERT_OK(Put(1, "0101", "test"));
T
Tomislav Novak 已提交
9484

9485
  ASSERT_OK(Flush(1));
T
Tomislav Novak 已提交
9486

9487
  ASSERT_OK(Put(1, "0202", "test"));
T
Tomislav Novak 已提交
9488 9489 9490 9491 9492 9493 9494 9495 9496 9497 9498

  // Seek(0102) shouldn't find any records since 0202 has a different prefix
  iter->Seek("0102");
  ASSERT_TRUE(!iter->Valid());

  iter->Seek("0202");
  ASSERT_TRUE(iter->Valid());
  ASSERT_EQ(iter->key().ToString(), "0202");

  iter->Next();
  ASSERT_TRUE(!iter->Valid());
V
Venkatesh Radhakrishnan 已提交
9499
  XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip, 0);
T
Tomislav Novak 已提交
9500 9501
}

I
Igor Sugak 已提交
9502
TEST_F(DBTest, TailingIteratorIncomplete) {
L
Lei Jin 已提交
9503
  CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
9504 9505 9506 9507 9508 9509 9510 9511 9512 9513 9514 9515 9516 9517 9518 9519 9520 9521 9522 9523
  ReadOptions read_options;
  read_options.tailing = true;
  read_options.read_tier = kBlockCacheTier;

  std::string key("key");
  std::string value("value");

  ASSERT_OK(db_->Put(WriteOptions(), key, value));

  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  iter->SeekToFirst();
  // we either see the entry or it's not in cache
  ASSERT_TRUE(iter->Valid() || iter->status().IsIncomplete());

  ASSERT_OK(db_->CompactRange(nullptr, nullptr));
  iter->SeekToFirst();
  // should still be true after compaction
  ASSERT_TRUE(iter->Valid() || iter->status().IsIncomplete());
}

I
Igor Sugak 已提交
9524
TEST_F(DBTest, TailingIteratorSeekToSame) {
T
Tomislav Novak 已提交
9525 9526 9527
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleUniversal;
  options.write_buffer_size = 1000;
L
Lei Jin 已提交
9528
  CreateAndReopenWithCF({"pikachu"}, options);
T
Tomislav Novak 已提交
9529 9530 9531 9532 9533 9534 9535 9536 9537 9538 9539 9540 9541 9542 9543 9544 9545 9546 9547 9548 9549 9550 9551

  ReadOptions read_options;
  read_options.tailing = true;

  const int NROWS = 10000;
  // Write rows with keys 00000, 00002, 00004 etc.
  for (int i = 0; i < NROWS; ++i) {
    char buf[100];
    snprintf(buf, sizeof(buf), "%05d", 2*i);
    std::string key(buf);
    std::string value("value");
    ASSERT_OK(db_->Put(WriteOptions(), key, value));
  }

  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  // Seek to 00001.  We expect to find 00002.
  std::string start_key = "00001";
  iter->Seek(start_key);
  ASSERT_TRUE(iter->Valid());

  std::string found = iter->key().ToString();
  ASSERT_EQ("00002", found);

V
Venkatesh Radhakrishnan 已提交
9552 9553 9554 9555 9556 9557 9558
  // Now seek to the same key.  The iterator should remain in the same
  // position.
  iter->Seek(found);
  ASSERT_TRUE(iter->Valid());
  ASSERT_EQ(found, iter->key().ToString());
}

I
Igor Sugak 已提交
9559
TEST_F(DBTest, ManagedTailingIteratorSingle) {
V
Venkatesh Radhakrishnan 已提交
9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 9570 9571 9572 9573 9574 9575 9576 9577
  ReadOptions read_options;
  read_options.tailing = true;
  read_options.managed = true;

  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  iter->SeekToFirst();
  ASSERT_TRUE(!iter->Valid());

  // add a record and check that iter can see it
  ASSERT_OK(db_->Put(WriteOptions(), "mirko", "fodor"));
  iter->SeekToFirst();
  ASSERT_TRUE(iter->Valid());
  ASSERT_EQ(iter->key().ToString(), "mirko");

  iter->Next();
  ASSERT_TRUE(!iter->Valid());
}

I
Igor Sugak 已提交
9578
TEST_F(DBTest, ManagedTailingIteratorKeepAdding) {
V
Venkatesh Radhakrishnan 已提交
9579 9580 9581 9582 9583 9584 9585 9586 9587 9588 9589 9590 9591 9592 9593 9594 9595 9596 9597 9598 9599 9600
  CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  ReadOptions read_options;
  read_options.tailing = true;
  read_options.managed = true;

  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options, handles_[1]));
  std::string value(1024, 'a');

  const int num_records = 10000;
  for (int i = 0; i < num_records; ++i) {
    char buf[32];
    snprintf(buf, sizeof(buf), "%016d", i);

    Slice key(buf, 16);
    ASSERT_OK(Put(1, key, value));

    iter->Seek(key);
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(key), 0);
  }
}

I
Igor Sugak 已提交
9601
TEST_F(DBTest, ManagedTailingIteratorSeekToNext) {
V
Venkatesh Radhakrishnan 已提交
9602 9603 9604 9605 9606 9607 9608 9609 9610 9611 9612 9613 9614 9615 9616 9617 9618 9619 9620 9621 9622 9623 9624 9625 9626 9627 9628 9629 9630 9631 9632 9633 9634 9635 9636 9637 9638 9639 9640 9641 9642 9643 9644 9645 9646 9647 9648
  CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  ReadOptions read_options;
  read_options.tailing = true;
  read_options.managed = true;

  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options, handles_[1]));
  std::string value(1024, 'a');

  const int num_records = 1000;
  for (int i = 1; i < num_records; ++i) {
    char buf1[32];
    char buf2[32];
    snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5);

    Slice key(buf1, 20);
    ASSERT_OK(Put(1, key, value));

    if (i % 100 == 99) {
      ASSERT_OK(Flush(1));
    }

    snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2);
    Slice target(buf2, 20);
    iter->Seek(target);
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(key), 0);
  }
  for (int i = 2 * num_records; i > 0; --i) {
    char buf1[32];
    char buf2[32];
    snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5);

    Slice key(buf1, 20);
    ASSERT_OK(Put(1, key, value));

    if (i % 100 == 99) {
      ASSERT_OK(Flush(1));
    }

    snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2);
    Slice target(buf2, 20);
    iter->Seek(target);
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(key), 0);
  }
}

I
Igor Sugak 已提交
9649
TEST_F(DBTest, ManagedTailingIteratorDeletes) {
V
Venkatesh Radhakrishnan 已提交
9650 9651 9652 9653 9654 9655 9656 9657 9658 9659 9660 9661 9662 9663 9664 9665 9666 9667 9668 9669 9670 9671 9672 9673 9674 9675 9676 9677 9678 9679 9680 9681 9682 9683 9684 9685 9686 9687 9688 9689
  CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  ReadOptions read_options;
  read_options.tailing = true;
  read_options.managed = true;

  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options, handles_[1]));

  // write a single record, read it using the iterator, then delete it
  ASSERT_OK(Put(1, "0test", "test"));
  iter->SeekToFirst();
  ASSERT_TRUE(iter->Valid());
  ASSERT_EQ(iter->key().ToString(), "0test");
  ASSERT_OK(Delete(1, "0test"));

  // write many more records
  const int num_records = 10000;
  std::string value(1024, 'A');

  for (int i = 0; i < num_records; ++i) {
    char buf[32];
    snprintf(buf, sizeof(buf), "1%015d", i);

    Slice key(buf, 16);
    ASSERT_OK(Put(1, key, value));
  }

  // force a flush to make sure that no records are read from memtable
  ASSERT_OK(Flush(1));

  // skip "0test"
  iter->Next();

  // make sure we can read all new records using the existing iterator
  int count = 0;
  for (; iter->Valid(); iter->Next(), ++count) {
  }

  ASSERT_EQ(count, num_records);
}

I
Igor Sugak 已提交
9690
TEST_F(DBTest, ManagedTailingIteratorPrefixSeek) {
V
Venkatesh Radhakrishnan 已提交
9691 9692 9693 9694 9695 9696 9697 9698 9699 9700 9701 9702 9703 9704 9705 9706 9707 9708 9709 9710 9711 9712 9713 9714 9715 9716 9717 9718 9719 9720 9721 9722 9723 9724 9725
  XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip,
             kSkipNoPrefix);
  ReadOptions read_options;
  read_options.tailing = true;
  read_options.managed = true;

  Options options = CurrentOptions();
  options.env = env_;
  options.create_if_missing = true;
  options.disable_auto_compactions = true;
  options.prefix_extractor.reset(NewFixedPrefixTransform(2));
  options.memtable_factory.reset(NewHashSkipListRepFactory(16));
  DestroyAndReopen(options);
  CreateAndReopenWithCF({"pikachu"}, options);

  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options, handles_[1]));
  ASSERT_OK(Put(1, "0101", "test"));

  ASSERT_OK(Flush(1));

  ASSERT_OK(Put(1, "0202", "test"));

  // Seek(0102) shouldn't find any records since 0202 has a different prefix
  iter->Seek("0102");
  ASSERT_TRUE(!iter->Valid());

  iter->Seek("0202");
  ASSERT_TRUE(iter->Valid());
  ASSERT_EQ(iter->key().ToString(), "0202");

  iter->Next();
  ASSERT_TRUE(!iter->Valid());
  XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip, 0);
}

I
Igor Sugak 已提交
9726
TEST_F(DBTest, ManagedTailingIteratorIncomplete) {
V
Venkatesh Radhakrishnan 已提交
9727 9728 9729 9730 9731 9732 9733 9734 9735 9736 9737 9738 9739 9740 9741 9742 9743 9744 9745 9746 9747 9748
  CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  ReadOptions read_options;
  read_options.tailing = true;
  read_options.managed = true;
  read_options.read_tier = kBlockCacheTier;

  std::string key = "key";
  std::string value = "value";

  ASSERT_OK(db_->Put(WriteOptions(), key, value));

  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  iter->SeekToFirst();
  // we either see the entry or it's not in cache
  ASSERT_TRUE(iter->Valid() || iter->status().IsIncomplete());

  ASSERT_OK(db_->CompactRange(nullptr, nullptr));
  iter->SeekToFirst();
  // should still be true after compaction
  ASSERT_TRUE(iter->Valid() || iter->status().IsIncomplete());
}

I
Igor Sugak 已提交
9749
TEST_F(DBTest, ManagedTailingIteratorSeekToSame) {
V
Venkatesh Radhakrishnan 已提交
9750 9751 9752 9753 9754 9755 9756 9757 9758 9759 9760 9761 9762 9763 9764 9765 9766 9767 9768 9769 9770 9771 9772 9773 9774 9775 9776 9777
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleUniversal;
  options.write_buffer_size = 1000;
  CreateAndReopenWithCF({"pikachu"}, options);

  ReadOptions read_options;
  read_options.tailing = true;
  read_options.managed = true;

  const int NROWS = 10000;
  // Write rows with keys 00000, 00002, 00004 etc.
  for (int i = 0; i < NROWS; ++i) {
    char buf[100];
    snprintf(buf, sizeof(buf), "%05d", 2 * i);
    std::string key(buf);
    std::string value("value");
    ASSERT_OK(db_->Put(WriteOptions(), key, value));
  }

  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  // Seek to 00001.  We expect to find 00002.
  std::string start_key = "00001";
  iter->Seek(start_key);
  ASSERT_TRUE(iter->Valid());

  std::string found = iter->key().ToString();
  ASSERT_EQ("00002", found);

T
Tomislav Novak 已提交
9778 9779 9780 9781 9782 9783 9784
  // Now seek to the same key.  The iterator should remain in the same
  // position.
  iter->Seek(found);
  ASSERT_TRUE(iter->Valid());
  ASSERT_EQ(found, iter->key().ToString());
}

I
Igor Sugak 已提交
9785
TEST_F(DBTest, BlockBasedTablePrefixIndexTest) {
9786 9787 9788 9789 9790 9791 9792 9793
  // create a DB with block prefix index
  BlockBasedTableOptions table_options;
  Options options = CurrentOptions();
  table_options.index_type = BlockBasedTableOptions::kHashSearch;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  options.prefix_extractor.reset(NewFixedPrefixTransform(1));


L
Lei Jin 已提交
9794
  Reopen(options);
9795 9796 9797 9798 9799 9800 9801 9802 9803 9804
  ASSERT_OK(Put("k1", "v1"));
  Flush();
  ASSERT_OK(Put("k2", "v2"));

  // Reopen it without prefix extractor, make sure everything still works.
  // RocksDB should just fall back to the binary index.
  table_options.index_type = BlockBasedTableOptions::kBinarySearch;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  options.prefix_extractor.reset();

L
Lei Jin 已提交
9805
  Reopen(options);
9806 9807 9808 9809
  ASSERT_EQ("v1", Get("k1"));
  ASSERT_EQ("v2", Get("k2"));
}

I
Igor Sugak 已提交
9810
TEST_F(DBTest, ChecksumTest) {
I
xxHash  
Igor Canadi 已提交
9811 9812 9813 9814 9815
  BlockBasedTableOptions table_options;
  Options options = CurrentOptions();

  table_options.checksum = kCRC32c;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
L
Lei Jin 已提交
9816
  Reopen(options);
I
xxHash  
Igor Canadi 已提交
9817 9818 9819 9820 9821 9822
  ASSERT_OK(Put("a", "b"));
  ASSERT_OK(Put("c", "d"));
  ASSERT_OK(Flush());  // table with crc checksum

  table_options.checksum = kxxHash;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
L
Lei Jin 已提交
9823
  Reopen(options);
I
xxHash  
Igor Canadi 已提交
9824 9825 9826 9827 9828 9829
  ASSERT_OK(Put("e", "f"));
  ASSERT_OK(Put("g", "h"));
  ASSERT_OK(Flush());  // table with xxhash checksum

  table_options.checksum = kCRC32c;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
L
Lei Jin 已提交
9830
  Reopen(options);
I
xxHash  
Igor Canadi 已提交
9831 9832 9833 9834
  ASSERT_EQ("b", Get("a"));
  ASSERT_EQ("d", Get("c"));
  ASSERT_EQ("f", Get("e"));
  ASSERT_EQ("h", Get("g"));
T
Tomislav Novak 已提交
9835

I
xxHash  
Igor Canadi 已提交
9836 9837
  table_options.checksum = kCRC32c;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
L
Lei Jin 已提交
9838
  Reopen(options);
I
xxHash  
Igor Canadi 已提交
9839 9840 9841 9842 9843
  ASSERT_EQ("b", Get("a"));
  ASSERT_EQ("d", Get("c"));
  ASSERT_EQ("f", Get("e"));
  ASSERT_EQ("h", Get("g"));
}
I
Igor Canadi 已提交
9844

I
Igor Sugak 已提交
9845
TEST_F(DBTest, FIFOCompactionTest) {
I
Igor Canadi 已提交
9846 9847 9848 9849 9850 9851 9852 9853 9854 9855 9856 9857
  for (int iter = 0; iter < 2; ++iter) {
    // first iteration -- auto compaction
    // second iteration -- manual compaction
    Options options;
    options.compaction_style = kCompactionStyleFIFO;
    options.write_buffer_size = 100 << 10;                             // 100KB
    options.compaction_options_fifo.max_table_files_size = 500 << 10;  // 500KB
    options.compression = kNoCompression;
    options.create_if_missing = true;
    if (iter == 1) {
      options.disable_auto_compactions = true;
    }
9858
    options = CurrentOptions(options);
L
Lei Jin 已提交
9859
    DestroyAndReopen(options);
I
Igor Canadi 已提交
9860 9861 9862 9863

    Random rnd(301);
    for (int i = 0; i < 6; ++i) {
      for (int j = 0; j < 100; ++j) {
9864
        ASSERT_OK(Put(ToString(i * 100 + j), RandomString(&rnd, 1024)));
I
Igor Canadi 已提交
9865 9866
      }
      // flush should happen here
9867
      ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
I
Igor Canadi 已提交
9868 9869 9870 9871 9872 9873 9874 9875 9876 9877
    }
    if (iter == 0) {
      ASSERT_OK(dbfull()->TEST_WaitForCompact());
    } else {
      ASSERT_OK(db_->CompactRange(nullptr, nullptr));
    }
    // only 5 files should survive
    ASSERT_EQ(NumTableFilesAtLevel(0), 5);
    for (int i = 0; i < 50; ++i) {
      // these keys should be deleted in previous compaction
9878
      ASSERT_EQ("NOT_FOUND", Get(ToString(i)));
I
Igor Canadi 已提交
9879 9880 9881
    }
  }
}
9882

I
Igor Sugak 已提交
9883
TEST_F(DBTest, SimpleWriteTimeoutTest) {
I
Igor Canadi 已提交
9884 9885 9886 9887 9888 9889 9890 9891
  // Block compaction thread, which will also block the flushes because
  // max_background_flushes == 0, so flushes are getting executed by the
  // compaction thread
  env_->SetBackgroundThreads(1, Env::LOW);
  SleepingBackgroundTask sleeping_task_low;
  env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
                 Env::Priority::LOW);

9892 9893 9894 9895 9896 9897 9898
  Options options;
  options.env = env_;
  options.create_if_missing = true;
  options.write_buffer_size = 100000;
  options.max_background_flushes = 0;
  options.max_write_buffer_number = 2;
  options.max_total_wal_size = std::numeric_limits<uint64_t>::max();
L
Lei Jin 已提交
9899
  WriteOptions write_opt;
9900
  write_opt.timeout_hint_us = 0;
L
Lei Jin 已提交
9901
  DestroyAndReopen(options);
I
Igor Canadi 已提交
9902
  // fill the two write buffers
9903 9904 9905 9906
  ASSERT_OK(Put(Key(1), Key(1) + std::string(100000, 'v'), write_opt));
  ASSERT_OK(Put(Key(2), Key(2) + std::string(100000, 'v'), write_opt));
  // As the only two write buffers are full in this moment, the third
  // Put is expected to be timed-out.
9907
  write_opt.timeout_hint_us = 50;
9908 9909
  ASSERT_TRUE(
      Put(Key(3), Key(3) + std::string(100000, 'v'), write_opt).IsTimedOut());
I
Igor Canadi 已提交
9910 9911 9912

  sleeping_task_low.WakeUp();
  sleeping_task_low.WaitUntilDone();
9913 9914 9915 9916 9917 9918 9919 9920 9921 9922 9923 9924 9925 9926 9927 9928 9929 9930 9931 9932 9933 9934
}

// Multi-threaded Timeout Test
namespace {

static const int kValueSize = 1000;
static const int kWriteBufferSize = 100000;

struct TimeoutWriterState {
  int id;
  DB* db;
  std::atomic<bool> done;
  std::map<int, std::string> success_kvs;
};

static void RandomTimeoutWriter(void* arg) {
  TimeoutWriterState* state = reinterpret_cast<TimeoutWriterState*>(arg);
  static const uint64_t kTimerBias = 50;
  int thread_id = state->id;
  DB* db = state->db;

  Random rnd(1000 + thread_id);
L
Lei Jin 已提交
9935
  WriteOptions write_opt;
9936 9937 9938 9939 9940 9941 9942 9943 9944 9945 9946 9947 9948 9949 9950 9951 9952 9953 9954 9955 9956 9957 9958 9959 9960 9961 9962 9963 9964 9965 9966 9967 9968 9969 9970 9971 9972 9973 9974 9975 9976
  write_opt.timeout_hint_us = 500;
  int timeout_count = 0;
  int num_keys = kNumKeys * 5;

  for (int k = 0; k < num_keys; ++k) {
    int key = k + thread_id * num_keys;
    std::string value = RandomString(&rnd, kValueSize);
    // only the second-half is randomized
    if (k > num_keys / 2) {
      switch (rnd.Next() % 5) {
        case 0:
          write_opt.timeout_hint_us = 500 * thread_id;
          break;
        case 1:
          write_opt.timeout_hint_us = num_keys - k;
          break;
        case 2:
          write_opt.timeout_hint_us = 1;
          break;
        default:
          write_opt.timeout_hint_us = 0;
          state->success_kvs.insert({key, value});
      }
    }

    uint64_t time_before_put = db->GetEnv()->NowMicros();
    Status s = db->Put(write_opt, Key(key), value);
    uint64_t put_duration = db->GetEnv()->NowMicros() - time_before_put;
    if (write_opt.timeout_hint_us == 0 ||
        put_duration + kTimerBias < write_opt.timeout_hint_us) {
      ASSERT_OK(s);
    }
    if (s.IsTimedOut()) {
      timeout_count++;
      ASSERT_GT(put_duration + kTimerBias, write_opt.timeout_hint_us);
    }
  }

  state->done = true;
}

I
Igor Sugak 已提交
9977
TEST_F(DBTest, MTRandomTimeoutTest) {
9978 9979 9980 9981 9982 9983 9984 9985
  Options options;
  options.env = env_;
  options.create_if_missing = true;
  options.max_write_buffer_number = 2;
  options.compression = kNoCompression;
  options.level0_slowdown_writes_trigger = 10;
  options.level0_stop_writes_trigger = 20;
  options.write_buffer_size = kWriteBufferSize;
L
Lei Jin 已提交
9986
  DestroyAndReopen(options);
9987 9988 9989 9990 9991 9992 9993 9994 9995 9996 9997 9998 9999 10000 10001 10002 10003 10004 10005 10006 10007 10008 10009 10010 10011

  TimeoutWriterState thread_states[kNumThreads];
  for (int tid = 0; tid < kNumThreads; ++tid) {
    thread_states[tid].id = tid;
    thread_states[tid].db = db_;
    thread_states[tid].done = false;
    env_->StartThread(RandomTimeoutWriter, &thread_states[tid]);
  }

  for (int tid = 0; tid < kNumThreads; ++tid) {
    while (thread_states[tid].done == false) {
      env_->SleepForMicroseconds(100000);
    }
  }

  Flush();

  for (int tid = 0; tid < kNumThreads; ++tid) {
    auto& success_kvs = thread_states[tid].success_kvs;
    for (auto it = success_kvs.begin(); it != success_kvs.end(); ++it) {
      ASSERT_EQ(Get(Key(it->first)), it->second);
    }
  }
}

I
Igor Sugak 已提交
10012
TEST_F(DBTest, Level0StopWritesTest) {
10013 10014 10015
  Options options = CurrentOptions();
  options.level0_slowdown_writes_trigger = 2;
  options.level0_stop_writes_trigger = 4;
10016
  options.disable_auto_compactions = true;
10017
  options.max_mem_compaction_level = 0;
L
Lei Jin 已提交
10018
  Reopen(options);
10019 10020 10021 10022 10023 10024 10025 10026 10027 10028 10029 10030 10031

  // create 4 level0 tables
  for (int i = 0; i < 4; ++i) {
    Put("a", "b");
    Flush();
  }

  WriteOptions woptions;
  woptions.timeout_hint_us = 30 * 1000;  // 30 ms
  Status s = Put("a", "b", woptions);
  ASSERT_TRUE(s.IsTimedOut());
}

10032 10033
}  // anonymous namespace

L
Lei Jin 已提交
10034 10035
/*
 * This test is not reliable enough as it heavily depends on disk behavior.
L
Lei Jin 已提交
10036
 */
I
Igor Sugak 已提交
10037
TEST_F(DBTest, RateLimitingTest) {
L
Lei Jin 已提交
10038
  Options options = CurrentOptions();
L
Lei Jin 已提交
10039 10040 10041 10042 10043
  options.write_buffer_size = 1 << 20;         // 1MB
  options.level0_file_num_compaction_trigger = 2;
  options.target_file_size_base = 1 << 20;     // 1MB
  options.max_bytes_for_level_base = 4 << 20;  // 4MB
  options.max_bytes_for_level_multiplier = 4;
L
Lei Jin 已提交
10044 10045 10046
  options.compression = kNoCompression;
  options.create_if_missing = true;
  options.env = env_;
L
Lei Jin 已提交
10047
  options.IncreaseParallelism(4);
L
Lei Jin 已提交
10048
  DestroyAndReopen(options);
L
Lei Jin 已提交
10049

L
Lei Jin 已提交
10050 10051 10052
  WriteOptions wo;
  wo.disableWAL = true;

L
Lei Jin 已提交
10053 10054 10055
  // # no rate limiting
  Random rnd(301);
  uint64_t start = env_->NowMicros();
L
Lei Jin 已提交
10056 10057 10058 10059
  // Write ~96M data
  for (int64_t i = 0; i < (96 << 10); ++i) {
    ASSERT_OK(Put(RandomString(&rnd, 32),
                  RandomString(&rnd, (1 << 10) + 1), wo));
L
Lei Jin 已提交
10060 10061 10062 10063 10064 10065 10066
  }
  uint64_t elapsed = env_->NowMicros() - start;
  double raw_rate = env_->bytes_written_ * 1000000 / elapsed;
  Close();

  // # rate limiting with 0.7 x threshold
  options.rate_limiter.reset(
L
Lei Jin 已提交
10067
    NewGenericRateLimiter(static_cast<int64_t>(0.7 * raw_rate)));
L
Lei Jin 已提交
10068
  env_->bytes_written_ = 0;
L
Lei Jin 已提交
10069
  DestroyAndReopen(options);
L
Lei Jin 已提交
10070 10071

  start = env_->NowMicros();
L
Lei Jin 已提交
10072 10073 10074 10075
  // Write ~96M data
  for (int64_t i = 0; i < (96 << 10); ++i) {
    ASSERT_OK(Put(RandomString(&rnd, 32),
                  RandomString(&rnd, (1 << 10) + 1), wo));
L
Lei Jin 已提交
10076 10077 10078 10079 10080 10081 10082
  }
  elapsed = env_->NowMicros() - start;
  Close();
  ASSERT_TRUE(options.rate_limiter->GetTotalBytesThrough() ==
              env_->bytes_written_);
  double ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate;
  fprintf(stderr, "write rate ratio = %.2lf, expected 0.7\n", ratio);
L
Lei Jin 已提交
10083
  ASSERT_TRUE(ratio < 0.8);
L
Lei Jin 已提交
10084 10085 10086

  // # rate limiting with half of the raw_rate
  options.rate_limiter.reset(
L
Lei Jin 已提交
10087
    NewGenericRateLimiter(static_cast<int64_t>(raw_rate / 2)));
L
Lei Jin 已提交
10088
  env_->bytes_written_ = 0;
L
Lei Jin 已提交
10089
  DestroyAndReopen(options);
L
Lei Jin 已提交
10090 10091

  start = env_->NowMicros();
L
Lei Jin 已提交
10092 10093 10094 10095
  // Write ~96M data
  for (int64_t i = 0; i < (96 << 10); ++i) {
    ASSERT_OK(Put(RandomString(&rnd, 32),
                  RandomString(&rnd, (1 << 10) + 1), wo));
L
Lei Jin 已提交
10096 10097 10098 10099 10100 10101 10102
  }
  elapsed = env_->NowMicros() - start;
  Close();
  ASSERT_TRUE(options.rate_limiter->GetTotalBytesThrough() ==
              env_->bytes_written_);
  ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate;
  fprintf(stderr, "write rate ratio = %.2lf, expected 0.5\n", ratio);
L
Lei Jin 已提交
10103
  ASSERT_TRUE(ratio < 0.6);
L
Lei Jin 已提交
10104 10105
}

10106 10107 10108 10109 10110 10111 10112 10113 10114 10115 10116 10117 10118 10119 10120 10121 10122 10123 10124 10125 10126 10127 10128 10129 10130 10131 10132 10133 10134 10135 10136 10137 10138 10139 10140 10141 10142 10143 10144 10145 10146 10147 10148 10149 10150 10151 10152 10153 10154 10155 10156 10157
namespace {
  bool HaveOverlappingKeyRanges(
      const Comparator* c,
      const SstFileMetaData& a, const SstFileMetaData& b) {
    if (c->Compare(a.smallestkey, b.smallestkey) >= 0) {
      if (c->Compare(a.smallestkey, b.largestkey) <= 0) {
        // b.smallestkey <= a.smallestkey <= b.largestkey
        return true;
      }
    } else if (c->Compare(a.largestkey, b.smallestkey) >= 0) {
      // a.smallestkey < b.smallestkey <= a.largestkey
      return true;
    }
    if (c->Compare(a.largestkey, b.largestkey) <= 0) {
      if (c->Compare(a.largestkey, b.smallestkey) >= 0) {
        // b.smallestkey <= a.largestkey <= b.largestkey
        return true;
      }
    } else if (c->Compare(a.smallestkey, b.largestkey) <= 0) {
      // a.smallestkey <= b.largestkey < a.largestkey
      return true;
    }
    return false;
  }

  // Identifies all files between level "min_level" and "max_level"
  // which has overlapping key range with "input_file_meta".
  void GetOverlappingFileNumbersForLevelCompaction(
      const ColumnFamilyMetaData& cf_meta,
      const Comparator* comparator,
      int min_level, int max_level,
      const SstFileMetaData* input_file_meta,
      std::set<std::string>* overlapping_file_names) {
    std::set<const SstFileMetaData*> overlapping_files;
    overlapping_files.insert(input_file_meta);
    for (int m = min_level; m <= max_level; ++m) {
      for (auto& file : cf_meta.levels[m].files) {
        for (auto* included_file : overlapping_files) {
          if (HaveOverlappingKeyRanges(
                  comparator, *included_file, file)) {
            overlapping_files.insert(&file);
            overlapping_file_names->insert(file.name);
            break;
          }
        }
      }
    }
  }

  void VerifyCompactionResult(
      const ColumnFamilyMetaData& cf_meta,
      const std::set<std::string>& overlapping_file_numbers) {
C
Chris BeHanna 已提交
10158
#ifndef NDEBUG
10159 10160 10161 10162 10163 10164
    for (auto& level : cf_meta.levels) {
      for (auto& file : level.files) {
        assert(overlapping_file_numbers.find(file.name) ==
               overlapping_file_numbers.end());
      }
    }
C
Chris BeHanna 已提交
10165
#endif
10166 10167 10168 10169 10170 10171 10172 10173 10174 10175 10176 10177 10178 10179 10180 10181 10182 10183 10184 10185 10186 10187 10188
  }

  const SstFileMetaData* PickFileRandomly(
      const ColumnFamilyMetaData& cf_meta,
      Random* rand,
      int* level = nullptr) {
    auto file_id = rand->Uniform(static_cast<int>(
        cf_meta.file_count)) + 1;
    for (auto& level_meta : cf_meta.levels) {
      if (file_id <= level_meta.files.size()) {
        if (level != nullptr) {
          *level = level_meta.level;
        }
        auto result = rand->Uniform(file_id);
        return &(level_meta.files[result]);
      }
      file_id -= level_meta.files.size();
    }
    assert(false);
    return nullptr;
  }
}  // namespace

10189 10190
// TODO t6534343 -- Don't run two level 0 CompactFiles concurrently
TEST_F(DBTest, DISABLED_CompactFilesOnLevelCompaction) {
10191 10192 10193
  const int kTestKeySize = 16;
  const int kTestValueSize = 984;
  const int kEntrySize = kTestKeySize + kTestValueSize;
10194 10195 10196 10197 10198 10199 10200 10201 10202 10203 10204 10205 10206 10207 10208
  const int kEntriesPerBuffer = 100;
  Options options;
  options.create_if_missing = true;
  options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
  options.compaction_style = kCompactionStyleLevel;
  options.target_file_size_base = options.write_buffer_size;
  options.max_bytes_for_level_base = options.target_file_size_base * 2;
  options.level0_stop_writes_trigger = 2;
  options.max_bytes_for_level_multiplier = 2;
  options.compression = kNoCompression;
  options = CurrentOptions(options);
  CreateAndReopenWithCF({"pikachu"}, options);

  Random rnd(301);
  for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) {
10209
    ASSERT_OK(Put(1, ToString(key), RandomString(&rnd, kTestValueSize)));
10210 10211 10212 10213 10214 10215
  }
  dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
  dbfull()->TEST_WaitForCompact();

  ColumnFamilyMetaData cf_meta;
  dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta);
10216
  int output_level = static_cast<int>(cf_meta.levels.size()) - 1;
10217 10218 10219 10220 10221 10222 10223 10224 10225 10226 10227 10228 10229 10230 10231 10232 10233 10234 10235 10236 10237 10238 10239 10240
  for (int file_picked = 5; file_picked > 0; --file_picked) {
    std::set<std::string> overlapping_file_names;
    std::vector<std::string> compaction_input_file_names;
    for (int f = 0; f < file_picked; ++f) {
      int level;
      auto file_meta = PickFileRandomly(cf_meta, &rnd, &level);
      compaction_input_file_names.push_back(file_meta->name);
      GetOverlappingFileNumbersForLevelCompaction(
          cf_meta, options.comparator, level, output_level,
          file_meta, &overlapping_file_names);
    }

    ASSERT_OK(dbfull()->CompactFiles(
        CompactionOptions(), handles_[1],
        compaction_input_file_names,
        output_level));

    // Make sure all overlapping files do not exist after compaction
    dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta);
    VerifyCompactionResult(cf_meta, overlapping_file_names);
  }

  // make sure all key-values are still there.
  for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) {
10241
    ASSERT_NE(Get(1, ToString(key)), "NOT_FOUND");
10242 10243 10244
  }
}

I
Igor Sugak 已提交
10245
TEST_F(DBTest, CompactFilesOnUniversalCompaction) {
10246 10247 10248
  const int kTestKeySize = 16;
  const int kTestValueSize = 984;
  const int kEntrySize = kTestKeySize + kTestValueSize;
10249 10250 10251 10252 10253 10254 10255
  const int kEntriesPerBuffer = 10;

  ChangeCompactOptions();
  Options options;
  options.create_if_missing = true;
  options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
  options.compaction_style = kCompactionStyleLevel;
S
sdong 已提交
10256
  options.num_levels = 1;
10257 10258 10259 10260 10261 10262 10263
  options.target_file_size_base = options.write_buffer_size;
  options.compression = kNoCompression;
  options = CurrentOptions(options);
  CreateAndReopenWithCF({"pikachu"}, options);
  ASSERT_EQ(options.compaction_style, kCompactionStyleUniversal);
  Random rnd(301);
  for (int key = 1024 * kEntriesPerBuffer; key >= 0; --key) {
10264
    ASSERT_OK(Put(1, ToString(key), RandomString(&rnd, kTestValueSize)));
10265 10266 10267 10268 10269 10270 10271 10272 10273 10274 10275 10276 10277 10278 10279 10280 10281 10282 10283 10284 10285 10286 10287 10288 10289 10290 10291 10292 10293 10294 10295 10296 10297 10298 10299 10300 10301 10302 10303 10304 10305 10306 10307 10308 10309 10310 10311 10312 10313 10314
  }
  dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
  dbfull()->TEST_WaitForCompact();
  ColumnFamilyMetaData cf_meta;
  dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta);
  std::vector<std::string> compaction_input_file_names;
  for (auto file : cf_meta.levels[0].files) {
    if (rnd.OneIn(2)) {
      compaction_input_file_names.push_back(file.name);
    }
  }

  if (compaction_input_file_names.size() == 0) {
    compaction_input_file_names.push_back(
        cf_meta.levels[0].files[0].name);
  }

  // expect fail since universal compaction only allow L0 output
  ASSERT_TRUE(!dbfull()->CompactFiles(
      CompactionOptions(), handles_[1],
      compaction_input_file_names, 1).ok());

  // expect ok and verify the compacted files no longer exist.
  ASSERT_OK(dbfull()->CompactFiles(
      CompactionOptions(), handles_[1],
      compaction_input_file_names, 0));

  dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta);
  VerifyCompactionResult(
      cf_meta,
      std::set<std::string>(compaction_input_file_names.begin(),
          compaction_input_file_names.end()));

  compaction_input_file_names.clear();

  // Pick the first and the last file, expect everything is
  // compacted into one single file.
  compaction_input_file_names.push_back(
      cf_meta.levels[0].files[0].name);
  compaction_input_file_names.push_back(
      cf_meta.levels[0].files[
          cf_meta.levels[0].files.size() - 1].name);
  ASSERT_OK(dbfull()->CompactFiles(
      CompactionOptions(), handles_[1],
      compaction_input_file_names, 0));

  dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta);
  ASSERT_EQ(cf_meta.levels[0].files.size(), 1U);
}

I
Igor Sugak 已提交
10315
TEST_F(DBTest, TableOptionsSanitizeTest) {
10316 10317
  Options options = CurrentOptions();
  options.create_if_missing = true;
L
Lei Jin 已提交
10318
  DestroyAndReopen(options);
10319 10320 10321 10322
  ASSERT_EQ(db_->GetOptions().allow_mmap_reads, false);

  options.table_factory.reset(new PlainTableFactory());
  options.prefix_extractor.reset(NewNoopTransform());
L
Lei Jin 已提交
10323
  Destroy(options);
L
Lei Jin 已提交
10324
  ASSERT_TRUE(TryReopen(options).IsNotSupported());
10325 10326 10327 10328 10329

  // Test for check of prefix_extractor when hash index is used for
  // block-based table
  BlockBasedTableOptions to;
  to.index_type = BlockBasedTableOptions::kHashSearch;
10330
  options = CurrentOptions();
10331 10332
  options.create_if_missing = true;
  options.table_factory.reset(NewBlockBasedTableFactory(to));
L
Lei Jin 已提交
10333
  ASSERT_TRUE(TryReopen(options).IsInvalidArgument());
10334
  options.prefix_extractor.reset(NewFixedPrefixTransform(1));
L
Lei Jin 已提交
10335
  ASSERT_OK(TryReopen(options));
10336 10337
}

I
Igor Sugak 已提交
10338
TEST_F(DBTest, SanitizeNumThreads) {
10339 10340 10341 10342 10343 10344 10345 10346 10347 10348 10349 10350 10351 10352 10353 10354 10355 10356 10357 10358 10359 10360 10361 10362 10363 10364 10365 10366 10367 10368 10369 10370 10371 10372 10373 10374 10375 10376
  for (int attempt = 0; attempt < 2; attempt++) {
    const size_t kTotalTasks = 8;
    SleepingBackgroundTask sleeping_tasks[kTotalTasks];

    Options options = CurrentOptions();
    if (attempt == 0) {
      options.max_background_compactions = 3;
      options.max_background_flushes = 2;
    }
    options.create_if_missing = true;
    DestroyAndReopen(options);

    for (size_t i = 0; i < kTotalTasks; i++) {
      // Insert 5 tasks to low priority queue and 5 tasks to high priority queue
      env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_tasks[i],
                     (i < 4) ? Env::Priority::LOW : Env::Priority::HIGH);
    }

    // Wait 100 milliseconds for they are scheduled.
    env_->SleepForMicroseconds(100000);

    // pool size 3, total task 4. Queue size should be 1.
    ASSERT_EQ(1U, options.env->GetThreadPoolQueueLen(Env::Priority::LOW));
    // pool size 2, total task 4. Queue size should be 2.
    ASSERT_EQ(2U, options.env->GetThreadPoolQueueLen(Env::Priority::HIGH));

    for (size_t i = 0; i < kTotalTasks; i++) {
      sleeping_tasks[i].WakeUp();
      sleeping_tasks[i].WaitUntilDone();
    }

    ASSERT_OK(Put("abc", "def"));
    ASSERT_EQ("def", Get("abc"));
    Flush();
    ASSERT_EQ("def", Get("abc"));
  }
}

I
Igor Sugak 已提交
10377
TEST_F(DBTest, DBIteratorBoundTest) {
10378
  Options options = CurrentOptions();
10379 10380 10381 10382
  options.env = env_;
  options.create_if_missing = true;

  options.prefix_extractor = nullptr;
L
Lei Jin 已提交
10383
  DestroyAndReopen(options);
10384 10385 10386 10387 10388 10389 10390 10391 10392 10393 10394 10395 10396 10397 10398 10399 10400 10401 10402 10403 10404 10405 10406 10407 10408 10409 10410 10411 10412 10413 10414
  ASSERT_OK(Put("a", "0"));
  ASSERT_OK(Put("foo", "bar"));
  ASSERT_OK(Put("foo1", "bar1"));
  ASSERT_OK(Put("g1", "0"));

  // testing basic case with no iterate_upper_bound and no prefix_extractor
  {
    ReadOptions ro;
    ro.iterate_upper_bound = nullptr;

    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));

    iter->Seek("foo");

    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(Slice("foo")), 0);

    iter->Next();
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);

    iter->Next();
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
  }

  // testing iterate_upper_bound and forward iterator
  // to make sure it stops at bound
  {
    ReadOptions ro;
    // iterate_upper_bound points beyond the last expected entry
10415 10416
    Slice prefix("foo2");
    ro.iterate_upper_bound = &prefix;
10417 10418 10419 10420 10421 10422 10423 10424 10425 10426 10427 10428 10429 10430 10431 10432 10433 10434 10435 10436

    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));

    iter->Seek("foo");

    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(Slice("foo")), 0);

    iter->Next();
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(("foo1")), 0);

    iter->Next();
    // should stop here...
    ASSERT_TRUE(!iter->Valid());
  }

  // prefix is the first letter of the key
  options.prefix_extractor.reset(NewFixedPrefixTransform(1));

L
Lei Jin 已提交
10437
  DestroyAndReopen(options);
10438 10439 10440 10441 10442 10443 10444 10445 10446 10447
  ASSERT_OK(Put("a", "0"));
  ASSERT_OK(Put("foo", "bar"));
  ASSERT_OK(Put("foo1", "bar1"));
  ASSERT_OK(Put("g1", "0"));

  // testing with iterate_upper_bound and prefix_extractor
  // Seek target and iterate_upper_bound are not is same prefix
  // This should be an error
  {
    ReadOptions ro;
10448 10449
    Slice prefix("g1");
    ro.iterate_upper_bound = &prefix;
10450 10451 10452 10453 10454 10455 10456 10457 10458 10459 10460 10461 10462

    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));

    iter->Seek("foo");

    ASSERT_TRUE(!iter->Valid());
    ASSERT_TRUE(iter->status().IsInvalidArgument());
  }

  // testing that iterate_upper_bound prevents iterating over deleted items
  // if the bound has already reached
  {
    options.prefix_extractor = nullptr;
L
Lei Jin 已提交
10463
    DestroyAndReopen(options);
10464 10465 10466 10467 10468 10469 10470 10471 10472 10473 10474 10475 10476 10477 10478 10479 10480 10481 10482 10483 10484 10485 10486 10487 10488 10489 10490 10491 10492 10493
    ASSERT_OK(Put("a", "0"));
    ASSERT_OK(Put("b", "0"));
    ASSERT_OK(Put("b1", "0"));
    ASSERT_OK(Put("c", "0"));
    ASSERT_OK(Put("d", "0"));
    ASSERT_OK(Put("e", "0"));
    ASSERT_OK(Delete("c"));
    ASSERT_OK(Delete("d"));

    // base case with no bound
    ReadOptions ro;
    ro.iterate_upper_bound = nullptr;

    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));

    iter->Seek("b");
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(Slice("b")), 0);

    iter->Next();
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(("b1")), 0);

    perf_context.Reset();
    iter->Next();

    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(static_cast<int>(perf_context.internal_delete_skipped_count), 2);

    // now testing with iterate_bound
10494 10495
    Slice prefix("c");
    ro.iterate_upper_bound = &prefix;
10496 10497 10498 10499 10500 10501 10502 10503 10504 10505 10506 10507 10508 10509 10510 10511 10512 10513 10514 10515 10516 10517

    iter.reset(db_->NewIterator(ro));

    perf_context.Reset();

    iter->Seek("b");
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(Slice("b")), 0);

    iter->Next();
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(("b1")), 0);

    iter->Next();
    // the iteration should stop as soon as the the bound key is reached
    // even though the key is deleted
    // hence internal_delete_skipped_count should be 0
    ASSERT_TRUE(!iter->Valid());
    ASSERT_EQ(static_cast<int>(perf_context.internal_delete_skipped_count), 0);
  }
}

I
Igor Sugak 已提交
10518
TEST_F(DBTest, WriteSingleThreadEntry) {
10519 10520 10521 10522 10523 10524 10525 10526 10527 10528 10529 10530 10531 10532 10533 10534 10535
  std::vector<std::thread> threads;
  dbfull()->TEST_LockMutex();
  auto w = dbfull()->TEST_BeginWrite();
  threads.emplace_back([&] { Put("a", "b"); });
  env_->SleepForMicroseconds(10000);
  threads.emplace_back([&] { Flush(); });
  env_->SleepForMicroseconds(10000);
  dbfull()->TEST_UnlockMutex();
  dbfull()->TEST_LockMutex();
  dbfull()->TEST_EndWrite(w);
  dbfull()->TEST_UnlockMutex();

  for (auto& t : threads) {
    t.join();
  }
}

I
Igor Sugak 已提交
10536
TEST_F(DBTest, DisableDataSyncTest) {
S
sdong 已提交
10537
  env_->sync_counter_.store(0);
10538 10539 10540 10541 10542 10543 10544
  // iter 0 -- no sync
  // iter 1 -- sync
  for (int iter = 0; iter < 2; ++iter) {
    Options options = CurrentOptions();
    options.disableDataSync = iter == 0;
    options.create_if_missing = true;
    options.env = env_;
L
Lei Jin 已提交
10545
    Reopen(options);
L
Lei Jin 已提交
10546
    CreateAndReopenWithCF({"pikachu"}, options);
10547 10548 10549 10550 10551 10552 10553 10554 10555

    MakeTables(10, "a", "z");
    Compact("a", "z");

    if (iter == 0) {
      ASSERT_EQ(env_->sync_counter_.load(), 0);
    } else {
      ASSERT_GT(env_->sync_counter_.load(), 0);
    }
L
Lei Jin 已提交
10556
    Destroy(options);
10557 10558
  }
}
10559

I
Igor Sugak 已提交
10560
TEST_F(DBTest, DynamicMemtableOptions) {
L
Lei Jin 已提交
10561 10562 10563 10564 10565 10566 10567
  const uint64_t k64KB = 1 << 16;
  const uint64_t k128KB = 1 << 17;
  const uint64_t k5KB = 5 * 1024;
  Options options;
  options.env = env_;
  options.create_if_missing = true;
  options.compression = kNoCompression;
10568
  options.max_background_compactions = 1;
L
Lei Jin 已提交
10569 10570 10571 10572 10573 10574 10575
  options.max_mem_compaction_level = 0;
  options.write_buffer_size = k64KB;
  options.max_write_buffer_number = 2;
  // Don't trigger compact/slowdown/stop
  options.level0_file_num_compaction_trigger = 1024;
  options.level0_slowdown_writes_trigger = 1024;
  options.level0_stop_writes_trigger = 1024;
L
Lei Jin 已提交
10576
  DestroyAndReopen(options);
L
Lei Jin 已提交
10577 10578 10579 10580

  auto gen_l0_kb = [this](int size) {
    Random rnd(301);
    for (int i = 0; i < size; i++) {
L
Lei Jin 已提交
10581
      ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
L
Lei Jin 已提交
10582 10583 10584 10585
    }
    dbfull()->TEST_WaitForFlushMemTable();
  };

L
Lei Jin 已提交
10586
  // Test write_buffer_size
L
Lei Jin 已提交
10587 10588
  gen_l0_kb(64);
  ASSERT_EQ(NumTableFilesAtLevel(0), 1);
10589 10590
  ASSERT_LT(SizeAtLevel(0), k64KB + k5KB);
  ASSERT_GT(SizeAtLevel(0), k64KB - k5KB);
L
Lei Jin 已提交
10591 10592 10593 10594 10595 10596

  // Clean up L0
  dbfull()->CompactRange(nullptr, nullptr);
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);

  // Increase buffer size
10597
  ASSERT_OK(dbfull()->SetOptions({
L
Lei Jin 已提交
10598 10599 10600 10601 10602 10603 10604 10605
    {"write_buffer_size", "131072"},
  }));

  // The existing memtable is still 64KB in size, after it becomes immutable,
  // the next memtable will be 128KB in size. Write 256KB total, we should
  // have a 64KB L0 file, a 128KB L0 file, and a memtable with 64KB data
  gen_l0_kb(256);
  ASSERT_EQ(NumTableFilesAtLevel(0), 2);
10606 10607
  ASSERT_LT(SizeAtLevel(0), k128KB + k64KB + 2 * k5KB);
  ASSERT_GT(SizeAtLevel(0), k128KB + k64KB - 2 * k5KB);
L
Lei Jin 已提交
10608 10609 10610 10611 10612 10613

  // Test max_write_buffer_number
  // Block compaction thread, which will also block the flushes because
  // max_background_flushes == 0, so flushes are getting executed by the
  // compaction thread
  env_->SetBackgroundThreads(1, Env::LOW);
L
Lei Jin 已提交
10614 10615
  SleepingBackgroundTask sleeping_task_low1;
  env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_low1,
L
Lei Jin 已提交
10616 10617 10618 10619 10620
                 Env::Priority::LOW);
  // Start from scratch and disable compaction/flush. Flush can only happen
  // during compaction but trigger is pretty high
  options.max_background_flushes = 0;
  options.disable_auto_compactions = true;
L
Lei Jin 已提交
10621
  DestroyAndReopen(options);
L
Lei Jin 已提交
10622 10623 10624 10625 10626

  // Put until timeout, bounded by 256 puts. We should see timeout at ~128KB
  int count = 0;
  Random rnd(301);
  WriteOptions wo;
S
sdong 已提交
10627 10628 10629 10630 10631 10632
  wo.timeout_hint_us = 100000;  // Reasonabley long timeout to make sure sleep
                                // triggers but not forever.

  std::atomic<int> sleep_count(0);
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::DelayWrite:TimedWait",
10633
      [&](void* arg) { sleep_count.fetch_add(1); });
S
sdong 已提交
10634
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
L
Lei Jin 已提交
10635 10636 10637 10638

  while (Put(Key(count), RandomString(&rnd, 1024), wo).ok() && count < 256) {
    count++;
  }
S
sdong 已提交
10639
  ASSERT_GT(sleep_count.load(), 0);
10640 10641
  ASSERT_GT(static_cast<double>(count), 128 * 0.8);
  ASSERT_LT(static_cast<double>(count), 128 * 1.2);
L
Lei Jin 已提交
10642

L
Lei Jin 已提交
10643 10644
  sleeping_task_low1.WakeUp();
  sleeping_task_low1.WaitUntilDone();
L
Lei Jin 已提交
10645 10646

  // Increase
10647
  ASSERT_OK(dbfull()->SetOptions({
L
Lei Jin 已提交
10648 10649 10650 10651 10652
    {"max_write_buffer_number", "8"},
  }));
  // Clean up memtable and L0
  dbfull()->CompactRange(nullptr, nullptr);

L
Lei Jin 已提交
10653 10654
  SleepingBackgroundTask sleeping_task_low2;
  env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_low2,
L
Lei Jin 已提交
10655 10656
                 Env::Priority::LOW);
  count = 0;
S
sdong 已提交
10657
  sleep_count.store(0);
L
Lei Jin 已提交
10658 10659 10660
  while (Put(Key(count), RandomString(&rnd, 1024), wo).ok() && count < 1024) {
    count++;
  }
S
sdong 已提交
10661
  ASSERT_GT(sleep_count.load(), 0);
10662 10663
  ASSERT_GT(static_cast<double>(count), 512 * 0.8);
  ASSERT_LT(static_cast<double>(count), 512 * 1.2);
L
Lei Jin 已提交
10664 10665
  sleeping_task_low2.WakeUp();
  sleeping_task_low2.WaitUntilDone();
L
Lei Jin 已提交
10666 10667

  // Decrease
10668
  ASSERT_OK(dbfull()->SetOptions({
L
Lei Jin 已提交
10669 10670 10671 10672 10673
    {"max_write_buffer_number", "4"},
  }));
  // Clean up memtable and L0
  dbfull()->CompactRange(nullptr, nullptr);

L
Lei Jin 已提交
10674 10675 10676
  SleepingBackgroundTask sleeping_task_low3;
  env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_low3,
                 Env::Priority::LOW);
S
sdong 已提交
10677

L
Lei Jin 已提交
10678
  count = 0;
S
sdong 已提交
10679
  sleep_count.store(0);
L
Lei Jin 已提交
10680 10681 10682
  while (Put(Key(count), RandomString(&rnd, 1024), wo).ok() && count < 1024) {
    count++;
  }
S
sdong 已提交
10683
  ASSERT_GT(sleep_count.load(), 0);
10684 10685
  ASSERT_GT(static_cast<double>(count), 256 * 0.8);
  ASSERT_LT(static_cast<double>(count), 266 * 1.2);
L
Lei Jin 已提交
10686 10687
  sleeping_task_low3.WakeUp();
  sleeping_task_low3.WaitUntilDone();
S
sdong 已提交
10688 10689

  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
L
Lei Jin 已提交
10690 10691
}

Y
Yueh-Hsuan Chiang 已提交
10692
#if ROCKSDB_USING_THREAD_STATUS
10693 10694 10695 10696 10697 10698 10699 10700 10701 10702 10703 10704 10705 10706 10707
namespace {
void VerifyOperationCount(Env* env, ThreadStatus::OperationType op_type,
                          int expected_count) {
  int op_count = 0;
  std::vector<ThreadStatus> thread_list;
  ASSERT_OK(env->GetThreadList(&thread_list));
  for (auto thread : thread_list) {
    if (thread.operation_type == op_type) {
      op_count++;
    }
  }
  ASSERT_EQ(op_count, expected_count);
}
}  // namespace

I
Igor Sugak 已提交
10708
TEST_F(DBTest, GetThreadStatus) {
Y
Yueh-Hsuan Chiang 已提交
10709 10710
  Options options;
  options.env = env_;
10711 10712
  options.enable_thread_tracking = true;
  TryReopen(options);
Y
Yueh-Hsuan Chiang 已提交
10713 10714

  std::vector<ThreadStatus> thread_list;
10715
  Status s = env_->GetThreadList(&thread_list);
Y
Yueh-Hsuan Chiang 已提交
10716 10717 10718 10719 10720 10721 10722 10723 10724 10725 10726 10727

  for (int i = 0; i < 2; ++i) {
    // repeat the test with differet number of high / low priority threads
    const int kTestCount = 3;
    const unsigned int kHighPriCounts[kTestCount] = {3, 2, 5};
    const unsigned int kLowPriCounts[kTestCount] = {10, 15, 3};
    for (int test = 0; test < kTestCount; ++test) {
      // Change the number of threads in high / low priority pool.
      env_->SetBackgroundThreads(kHighPriCounts[test], Env::HIGH);
      env_->SetBackgroundThreads(kLowPriCounts[test], Env::LOW);
      // Wait to ensure the all threads has been registered
      env_->SleepForMicroseconds(100000);
10728
      s = env_->GetThreadList(&thread_list);
Y
Yueh-Hsuan Chiang 已提交
10729
      ASSERT_OK(s);
10730
      unsigned int thread_type_counts[ThreadStatus::NUM_THREAD_TYPES];
Y
Yueh-Hsuan Chiang 已提交
10731 10732
      memset(thread_type_counts, 0, sizeof(thread_type_counts));
      for (auto thread : thread_list) {
10733
        ASSERT_LT(thread.thread_type, ThreadStatus::NUM_THREAD_TYPES);
Y
Yueh-Hsuan Chiang 已提交
10734 10735 10736 10737
        thread_type_counts[thread.thread_type]++;
      }
      // Verify the total number of threades
      ASSERT_EQ(
10738 10739
          thread_type_counts[ThreadStatus::HIGH_PRIORITY] +
              thread_type_counts[ThreadStatus::LOW_PRIORITY],
Y
Yueh-Hsuan Chiang 已提交
10740 10741 10742
          kHighPriCounts[test] + kLowPriCounts[test]);
      // Verify the number of high-priority threads
      ASSERT_EQ(
10743
          thread_type_counts[ThreadStatus::HIGH_PRIORITY],
Y
Yueh-Hsuan Chiang 已提交
10744 10745 10746
          kHighPriCounts[test]);
      // Verify the number of low-priority threads
      ASSERT_EQ(
10747
          thread_type_counts[ThreadStatus::LOW_PRIORITY],
Y
Yueh-Hsuan Chiang 已提交
10748 10749 10750 10751 10752
          kLowPriCounts[test]);
    }
    if (i == 0) {
      // repeat the test with multiple column families
      CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options);
10753 10754
      env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(
          handles_, true);
Y
Yueh-Hsuan Chiang 已提交
10755 10756 10757
    }
  }
  db_->DropColumnFamily(handles_[2]);
10758
  delete handles_[2];
Y
Yueh-Hsuan Chiang 已提交
10759
  handles_.erase(handles_.begin() + 2);
10760 10761
  env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(
      handles_, true);
Y
Yueh-Hsuan Chiang 已提交
10762
  Close();
10763 10764
  env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(
      handles_, true);
10765 10766
}

I
Igor Sugak 已提交
10767
TEST_F(DBTest, DisableThreadStatus) {
10768 10769 10770 10771 10772 10773
  Options options;
  options.env = env_;
  options.enable_thread_tracking = false;
  TryReopen(options);
  CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options);
  // Verify non of the column family info exists
10774 10775
  env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(
      handles_, false);
Y
Yueh-Hsuan Chiang 已提交
10776
}
10777

I
Igor Sugak 已提交
10778
TEST_F(DBTest, ThreadStatusFlush) {
10779 10780 10781 10782 10783 10784 10785
  Options options;
  options.env = env_;
  options.write_buffer_size = 100000;  // Small write buffer
  options.enable_thread_tracking = true;
  options = CurrentOptions(options);

  rocksdb::SyncPoint::GetInstance()->LoadDependency({
10786 10787
      {"FlushJob::FlushJob()", "DBTest::ThreadStatusFlush:1"},
      {"DBTest::ThreadStatusFlush:2", "FlushJob::~FlushJob()"},
10788 10789 10790 10791 10792 10793 10794 10795 10796 10797 10798 10799 10800 10801 10802 10803 10804 10805 10806 10807 10808 10809
  });
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();

  CreateAndReopenWithCF({"pikachu"}, options);
  VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);

  ASSERT_OK(Put(1, "foo", "v1"));
  ASSERT_EQ("v1", Get(1, "foo"));
  VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);

  Put(1, "k1", std::string(100000, 'x'));  // Fill memtable
  VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
  Put(1, "k2", std::string(100000, 'y'));  // Trigger flush
  // wait for flush to be scheduled
  env_->SleepForMicroseconds(250000);
  TEST_SYNC_POINT("DBTest::ThreadStatusFlush:1");
  VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 1);
  TEST_SYNC_POINT("DBTest::ThreadStatusFlush:2");

  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}

I
Igor Sugak 已提交
10810
TEST_F(DBTest, ThreadStatusSingleCompaction) {
10811 10812 10813 10814 10815 10816 10817 10818 10819 10820 10821 10822 10823 10824 10825 10826 10827
  const int kTestKeySize = 16;
  const int kTestValueSize = 984;
  const int kEntrySize = kTestKeySize + kTestValueSize;
  const int kEntriesPerBuffer = 100;
  Options options;
  options.create_if_missing = true;
  options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
  options.compaction_style = kCompactionStyleLevel;
  options.target_file_size_base = options.write_buffer_size;
  options.max_bytes_for_level_base = options.target_file_size_base * 2;
  options.max_bytes_for_level_multiplier = 2;
  options.compression = kNoCompression;
  options = CurrentOptions(options);
  options.env = env_;
  options.enable_thread_tracking = true;
  const int kNumL0Files = 4;
  options.level0_file_num_compaction_trigger = kNumL0Files;
10828 10829

  rocksdb::SyncPoint::GetInstance()->LoadDependency({
10830 10831 10832
      {"DBTest::ThreadStatusSingleCompaction:0", "DBImpl::BGWorkCompaction"},
      {"CompactionJob::Run():Start", "DBTest::ThreadStatusSingleCompaction:1"},
      {"DBTest::ThreadStatusSingleCompaction:2", "CompactionJob::Run():End"},
10833 10834 10835
  });
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();

10836
  for (int tests = 0; tests < 2; ++tests) {
10837
    DestroyAndReopen(options);
10838 10839

    Random rnd(301);
10840
    // The Put Phase.
10841 10842 10843 10844 10845 10846
    for (int file = 0; file < kNumL0Files; ++file) {
      for (int key = 0; key < kEntriesPerBuffer; ++key) {
        ASSERT_OK(Put(ToString(key + file * kEntriesPerBuffer),
                      RandomString(&rnd, kTestValueSize)));
      }
      Flush();
10847
    }
10848 10849 10850
    // This makes sure a compaction won't be scheduled until
    // we have done with the above Put Phase.
    TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:0");
10851 10852
    ASSERT_GE(NumTableFilesAtLevel(0),
              options.level0_file_num_compaction_trigger);
10853

10854
    // This makes sure at least one compaction is running.
10855
    TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:1");
10856

10857 10858
    if (options.enable_thread_tracking) {
      // expecting one single L0 to L1 compaction
10859
      VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 1);
10860 10861
    } else {
      // If thread tracking is not enabled, compaction count should be 0.
10862
      VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 0);
10863
    }
10864
    // TODO(yhchiang): adding assert to verify each compaction stage.
10865
    TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:2");
10866 10867 10868 10869

    // repeat the test with disabling thread tracking.
    options.enable_thread_tracking = false;
  }
10870
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
10871 10872
}

I
Igor Sugak 已提交
10873
TEST_F(DBTest, PreShutdownManualCompaction) {
10874 10875 10876 10877 10878 10879 10880 10881 10882 10883 10884 10885 10886 10887 10888 10889 10890 10891 10892 10893 10894 10895 10896 10897 10898 10899 10900 10901 10902 10903 10904 10905 10906 10907 10908 10909 10910 10911 10912 10913 10914 10915 10916 10917 10918 10919 10920 10921 10922 10923
  Options options = CurrentOptions();
  options.max_background_flushes = 0;
  CreateAndReopenWithCF({"pikachu"}, options);
  ASSERT_EQ(dbfull()->MaxMemCompactionLevel(), 2)
      << "Need to update this test to match kMaxMemCompactLevel";

  // iter - 0 with 7 levels
  // iter - 1 with 3 levels
  for (int iter = 0; iter < 2; ++iter) {
    MakeTables(3, "p", "q", 1);
    ASSERT_EQ("1,1,1", FilesPerLevel(1));

    // Compaction range falls before files
    Compact(1, "", "c");
    ASSERT_EQ("1,1,1", FilesPerLevel(1));

    // Compaction range falls after files
    Compact(1, "r", "z");
    ASSERT_EQ("1,1,1", FilesPerLevel(1));

    // Compaction range overlaps files
    Compact(1, "p1", "p9");
    ASSERT_EQ("0,0,1", FilesPerLevel(1));

    // Populate a different range
    MakeTables(3, "c", "e", 1);
    ASSERT_EQ("1,1,2", FilesPerLevel(1));

    // Compact just the new range
    Compact(1, "b", "f");
    ASSERT_EQ("0,0,2", FilesPerLevel(1));

    // Compact all
    MakeTables(1, "a", "z", 1);
    ASSERT_EQ("0,1,2", FilesPerLevel(1));
    CancelAllBackgroundWork(db_);
    db_->CompactRange(handles_[1], nullptr, nullptr);
    ASSERT_EQ("0,1,2", FilesPerLevel(1));

    if (iter == 0) {
      options = CurrentOptions();
      options.max_background_flushes = 0;
      options.num_levels = 3;
      options.create_if_missing = true;
      DestroyAndReopen(options);
      CreateAndReopenWithCF({"pikachu"}, options);
    }
  }
}

I
Igor Sugak 已提交
10924
TEST_F(DBTest, PreShutdownMultipleCompaction) {
10925 10926 10927
  const int kTestKeySize = 16;
  const int kTestValueSize = 984;
  const int kEntrySize = kTestKeySize + kTestValueSize;
10928
  const int kEntriesPerBuffer = 40;
10929 10930 10931 10932 10933 10934 10935 10936 10937 10938 10939 10940 10941 10942 10943 10944 10945 10946 10947 10948 10949
  const int kNumL0Files = 4;

  const int kHighPriCount = 3;
  const int kLowPriCount = 5;
  env_->SetBackgroundThreads(kHighPriCount, Env::HIGH);
  env_->SetBackgroundThreads(kLowPriCount, Env::LOW);

  Options options;
  options.create_if_missing = true;
  options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
  options.compaction_style = kCompactionStyleLevel;
  options.target_file_size_base = options.write_buffer_size;
  options.max_bytes_for_level_base =
      options.target_file_size_base * kNumL0Files;
  options.compression = kNoCompression;
  options = CurrentOptions(options);
  options.env = env_;
  options.enable_thread_tracking = true;
  options.level0_file_num_compaction_trigger = kNumL0Files;
  options.max_bytes_for_level_multiplier = 2;
  options.max_background_compactions = kLowPriCount;
10950 10951
  options.level0_stop_writes_trigger = 1 << 10;
  options.level0_slowdown_writes_trigger = 1 << 10;
10952 10953 10954 10955 10956 10957 10958

  TryReopen(options);
  Random rnd(301);

  std::vector<ThreadStatus> thread_list;
  // Delay both flush and compaction
  rocksdb::SyncPoint::GetInstance()->LoadDependency(
10959 10960
      {{"FlushJob::FlushJob()", "CompactionJob::Run():Start"},
       {"CompactionJob::Run():Start",
10961
        "DBTest::PreShutdownMultipleCompaction:Preshutdown"},
10962 10963
        {"CompactionJob::Run():Start",
        "DBTest::PreShutdownMultipleCompaction:VerifyCompaction"},
10964
       {"DBTest::PreShutdownMultipleCompaction:Preshutdown",
10965 10966
        "CompactionJob::Run():End"},
       {"CompactionJob::Run():End",
10967 10968 10969 10970 10971 10972 10973 10974
        "DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown"}});

  rocksdb::SyncPoint::GetInstance()->EnableProcessing();

  // Make rocksdb busy
  int key = 0;
  // check how many threads are doing compaction using GetThreadList
  int operation_count[ThreadStatus::NUM_OP_TYPES] = {0};
10975
  for (int file = 0; file < 16 * kNumL0Files; ++file) {
10976 10977 10978 10979 10980 10981 10982 10983 10984 10985
    for (int k = 0; k < kEntriesPerBuffer; ++k) {
      ASSERT_OK(Put(ToString(key++), RandomString(&rnd, kTestValueSize)));
    }

    Status s = env_->GetThreadList(&thread_list);
    for (auto thread : thread_list) {
      operation_count[thread.operation_type]++;
    }

    // Speed up the test
10986 10987
    if (operation_count[ThreadStatus::OP_FLUSH] > 1 &&
        operation_count[ThreadStatus::OP_COMPACTION] >
10988 10989 10990
            0.6 * options.max_background_compactions) {
      break;
    }
10991 10992 10993
    if (file == 15 * kNumL0Files) {
      TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown");
    }
10994 10995 10996
  }

  TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown");
10997
  ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1);
10998 10999 11000 11001 11002 11003 11004 11005 11006 11007 11008 11009 11010 11011
  CancelAllBackgroundWork(db_);
  TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown");
  dbfull()->TEST_WaitForCompact();
  // Record the number of compactions at a time.
  for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) {
    operation_count[i] = 0;
  }
  Status s = env_->GetThreadList(&thread_list);
  for (auto thread : thread_list) {
    operation_count[thread.operation_type]++;
  }
  ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0);
}

I
Igor Sugak 已提交
11012
TEST_F(DBTest, PreShutdownCompactionMiddle) {
11013 11014 11015
  const int kTestKeySize = 16;
  const int kTestValueSize = 984;
  const int kEntrySize = kTestKeySize + kTestValueSize;
11016
  const int kEntriesPerBuffer = 40;
11017 11018 11019 11020 11021 11022 11023 11024 11025 11026 11027 11028 11029 11030 11031 11032 11033 11034 11035 11036 11037
  const int kNumL0Files = 4;

  const int kHighPriCount = 3;
  const int kLowPriCount = 5;
  env_->SetBackgroundThreads(kHighPriCount, Env::HIGH);
  env_->SetBackgroundThreads(kLowPriCount, Env::LOW);

  Options options;
  options.create_if_missing = true;
  options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
  options.compaction_style = kCompactionStyleLevel;
  options.target_file_size_base = options.write_buffer_size;
  options.max_bytes_for_level_base =
      options.target_file_size_base * kNumL0Files;
  options.compression = kNoCompression;
  options = CurrentOptions(options);
  options.env = env_;
  options.enable_thread_tracking = true;
  options.level0_file_num_compaction_trigger = kNumL0Files;
  options.max_bytes_for_level_multiplier = 2;
  options.max_background_compactions = kLowPriCount;
11038 11039
  options.level0_stop_writes_trigger = 1 << 10;
  options.level0_slowdown_writes_trigger = 1 << 10;
11040 11041 11042 11043 11044 11045 11046

  TryReopen(options);
  Random rnd(301);

  std::vector<ThreadStatus> thread_list;
  // Delay both flush and compaction
  rocksdb::SyncPoint::GetInstance()->LoadDependency(
11047
      {{"DBTest::PreShutdownCompactionMiddle:Preshutdown",
11048
        "CompactionJob::Run():Inprogress"},
11049 11050
        {"CompactionJob::Run():Start",
        "DBTest::PreShutdownCompactionMiddle:VerifyCompaction"},
11051 11052
       {"CompactionJob::Run():Inprogress", "CompactionJob::Run():End"},
       {"CompactionJob::Run():End",
11053
        "DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown"}});
11054 11055 11056 11057 11058 11059 11060

  rocksdb::SyncPoint::GetInstance()->EnableProcessing();

  // Make rocksdb busy
  int key = 0;
  // check how many threads are doing compaction using GetThreadList
  int operation_count[ThreadStatus::NUM_OP_TYPES] = {0};
11061
  for (int file = 0; file < 16 * kNumL0Files; ++file) {
11062 11063 11064 11065 11066 11067 11068 11069 11070 11071
    for (int k = 0; k < kEntriesPerBuffer; ++k) {
      ASSERT_OK(Put(ToString(key++), RandomString(&rnd, kTestValueSize)));
    }

    Status s = env_->GetThreadList(&thread_list);
    for (auto thread : thread_list) {
      operation_count[thread.operation_type]++;
    }

    // Speed up the test
11072 11073
    if (operation_count[ThreadStatus::OP_FLUSH] > 1 &&
        operation_count[ThreadStatus::OP_COMPACTION] >
11074 11075 11076
            0.6 * options.max_background_compactions) {
      break;
    }
11077 11078 11079
    if (file == 15 * kNumL0Files) {
      TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyCompaction");
    }
11080 11081
  }

11082
  ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1);
11083
  CancelAllBackgroundWork(db_);
11084 11085
  TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:Preshutdown");
  TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown");
11086 11087 11088 11089 11090 11091 11092 11093 11094 11095 11096 11097
  dbfull()->TEST_WaitForCompact();
  // Record the number of compactions at a time.
  for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) {
    operation_count[i] = 0;
  }
  Status s = env_->GetThreadList(&thread_list);
  for (auto thread : thread_list) {
    operation_count[thread.operation_type]++;
  }
  ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0);
}

Y
Yueh-Hsuan Chiang 已提交
11098 11099
#endif  // ROCKSDB_USING_THREAD_STATUS

I
Igor Sugak 已提交
11100
TEST_F(DBTest, DynamicLevelMaxBytesBase) {
11101 11102 11103 11104 11105 11106 11107 11108 11109 11110 11111 11112 11113 11114 11115 11116 11117 11118 11119 11120 11121 11122 11123 11124 11125 11126 11127 11128 11129 11130 11131 11132 11133 11134 11135 11136 11137 11138 11139 11140 11141 11142 11143 11144 11145
  // Use InMemoryEnv, or it would be too slow.
  unique_ptr<Env> env(new MockEnv(env_));

  const int kNKeys = 1000;
  int keys[kNKeys];

  auto verify_func = [&]() {
    for (int i = 0; i < kNKeys; i++) {
      ASSERT_NE("NOT_FOUND", Get(Key(i)));
      ASSERT_NE("NOT_FOUND", Get(Key(kNKeys * 2 + i)));
      if (i < kNKeys / 10) {
        ASSERT_EQ("NOT_FOUND", Get(Key(kNKeys + keys[i])));
      } else {
        ASSERT_NE("NOT_FOUND", Get(Key(kNKeys + keys[i])));
      }
    }
  };

  Random rnd(301);
  for (int ordered_insert = 0; ordered_insert <= 1; ordered_insert++) {
    for (int i = 0; i < kNKeys; i++) {
      keys[i] = i;
    }
    if (ordered_insert == 0) {
      std::random_shuffle(std::begin(keys), std::end(keys));
    }
    for (int max_background_compactions = 1; max_background_compactions < 4;
         max_background_compactions += 2) {
      Options options;
      options.env = env.get();
      options.create_if_missing = true;
      options.db_write_buffer_size = 2048;
      options.write_buffer_size = 2048;
      options.max_write_buffer_number = 2;
      options.level0_file_num_compaction_trigger = 2;
      options.level0_slowdown_writes_trigger = 2;
      options.level0_stop_writes_trigger = 2;
      options.target_file_size_base = 2048;
      options.level_compaction_dynamic_level_bytes = true;
      options.max_bytes_for_level_base = 10240;
      options.max_bytes_for_level_multiplier = 4;
      options.hard_rate_limit = 1.1;
      options.max_background_compactions = max_background_compactions;
      options.num_levels = 5;

11146 11147 11148 11149 11150
      options.compression_per_level.resize(3);
      options.compression_per_level[0] = kNoCompression;
      options.compression_per_level[1] = kLZ4Compression;
      options.compression_per_level[2] = kSnappyCompression;

11151 11152 11153 11154 11155 11156 11157 11158 11159 11160 11161 11162 11163 11164 11165 11166 11167 11168 11169 11170 11171 11172 11173 11174 11175 11176 11177 11178 11179 11180 11181 11182 11183 11184 11185 11186 11187 11188 11189 11190 11191 11192 11193 11194
      DestroyAndReopen(options);

      for (int i = 0; i < kNKeys; i++) {
        int key = keys[i];
        ASSERT_OK(Put(Key(kNKeys + key), RandomString(&rnd, 102)));
        ASSERT_OK(Put(Key(key), RandomString(&rnd, 102)));
        ASSERT_OK(Put(Key(kNKeys * 2 + key), RandomString(&rnd, 102)));
        ASSERT_OK(Delete(Key(kNKeys + keys[i / 10])));
        env_->SleepForMicroseconds(5000);
      }

      uint64_t int_prop;
      ASSERT_TRUE(db_->GetIntProperty("rocksdb.background-errors", &int_prop));
      ASSERT_EQ(0U, int_prop);

      // Verify DB
      for (int j = 0; j < 2; j++) {
        verify_func();
        if (j == 0) {
          Reopen(options);
        }
      }

      // Test compact range works
      dbfull()->CompactRange(nullptr, nullptr);
      // All data should be in the last level.
      ColumnFamilyMetaData cf_meta;
      db_->GetColumnFamilyMetaData(&cf_meta);
      ASSERT_EQ(5U, cf_meta.levels.size());
      for (int i = 0; i < 4; i++) {
        ASSERT_EQ(0U, cf_meta.levels[i].files.size());
      }
      ASSERT_GT(cf_meta.levels[4U].files.size(), 0U);
      verify_func();

      Close();
    }
  }

  env_->SetBackgroundThreads(1, Env::LOW);
  env_->SetBackgroundThreads(1, Env::HIGH);
}

// Test specific cases in dynamic max bytes
I
Igor Sugak 已提交
11195
TEST_F(DBTest, DynamicLevelMaxBytesBase2) {
11196 11197 11198 11199 11200 11201 11202 11203 11204 11205 11206 11207 11208 11209 11210 11211 11212 11213 11214 11215 11216 11217 11218 11219 11220 11221 11222 11223 11224 11225 11226 11227 11228 11229 11230 11231 11232 11233 11234 11235 11236 11237 11238 11239 11240 11241 11242 11243 11244 11245 11246 11247 11248 11249 11250 11251 11252 11253 11254 11255 11256 11257 11258 11259
  Random rnd(301);
  int kMaxKey = 1000000;

  Options options = CurrentOptions();
  options.create_if_missing = true;
  options.db_write_buffer_size = 2048;
  options.write_buffer_size = 2048;
  options.max_write_buffer_number = 2;
  options.level0_file_num_compaction_trigger = 2;
  options.level0_slowdown_writes_trigger = 9999;
  options.level0_stop_writes_trigger = 9999;
  options.target_file_size_base = 2048;
  options.level_compaction_dynamic_level_bytes = true;
  options.max_bytes_for_level_base = 10240;
  options.max_bytes_for_level_multiplier = 4;
  options.max_background_compactions = 2;
  options.num_levels = 5;
  options.expanded_compaction_factor = 0;  // Force not expanding in compactions
  BlockBasedTableOptions table_options;
  table_options.block_size = 1024;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));

  DestroyAndReopen(options);
  ASSERT_OK(dbfull()->SetOptions({
      {"disable_auto_compactions", "true"},
  }));

  uint64_t int_prop;
  std::string str_prop;

  // Initial base level is the last level
  ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
  ASSERT_EQ(4U, int_prop);

  // Put about 7K to L0
  for (int i = 0; i < 70; i++) {
    ASSERT_OK(Put(Key(static_cast<int>(rnd.Uniform(kMaxKey))),
                  RandomString(&rnd, 80)));
  }
  ASSERT_OK(dbfull()->SetOptions({
      {"disable_auto_compactions", "false"},
  }));
  Flush();
  dbfull()->TEST_WaitForCompact();
  ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
  ASSERT_EQ(4U, int_prop);

  // Insert extra about 3.5K to L0. After they are compacted to L4, base level
  // should be changed to L3.
  ASSERT_OK(dbfull()->SetOptions({
      {"disable_auto_compactions", "true"},
  }));
  for (int i = 0; i < 70; i++) {
    ASSERT_OK(Put(Key(static_cast<int>(rnd.Uniform(kMaxKey))),
                  RandomString(&rnd, 80)));
  }

  ASSERT_OK(dbfull()->SetOptions({
      {"disable_auto_compactions", "false"},
  }));
  Flush();
  dbfull()->TEST_WaitForCompact();
  ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
  ASSERT_EQ(3U, int_prop);
11260 11261 11262
  ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level1", &str_prop));
  ASSERT_EQ("0", str_prop);
  ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level2", &str_prop));
11263 11264 11265 11266 11267 11268
  ASSERT_EQ("0", str_prop);

  // Trigger parallel compaction, and the first one would change the base
  // level.
  // Hold compaction jobs to make sure
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
11269
      "CompactionJob::Run():Start",
11270
      [&](void* arg) { env_->SleepForMicroseconds(100000); });
11271 11272 11273 11274 11275 11276 11277 11278 11279 11280 11281 11282 11283 11284 11285 11286 11287 11288 11289 11290 11291 11292 11293 11294 11295 11296 11297 11298 11299 11300 11301 11302 11303 11304 11305 11306 11307 11308 11309 11310 11311 11312 11313 11314 11315 11316 11317 11318 11319 11320 11321 11322 11323 11324 11325 11326 11327 11328 11329 11330 11331 11332 11333 11334 11335 11336 11337 11338 11339
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
  ASSERT_OK(dbfull()->SetOptions({
      {"disable_auto_compactions", "true"},
  }));
  // Write about 10K more
  for (int i = 0; i < 100; i++) {
    ASSERT_OK(Put(Key(static_cast<int>(rnd.Uniform(kMaxKey))),
                  RandomString(&rnd, 80)));
  }
  ASSERT_OK(dbfull()->SetOptions({
      {"disable_auto_compactions", "false"},
  }));
  Flush();
  // Wait for 200 milliseconds before proceeding compactions to make sure two
  // parallel ones are executed.
  env_->SleepForMicroseconds(200000);
  dbfull()->TEST_WaitForCompact();
  ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
  ASSERT_EQ(3U, int_prop);
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();

  // Trigger a condition that the compaction changes base level and L0->Lbase
  // happens at the same time.
  // We try to make last levels' targets to be 10K, 40K, 160K, add triggers
  // another compaction from 40K->160K.
  ASSERT_OK(dbfull()->SetOptions({
      {"disable_auto_compactions", "true"},
  }));
  // Write about 150K more
  for (int i = 0; i < 1350; i++) {
    ASSERT_OK(Put(Key(static_cast<int>(rnd.Uniform(kMaxKey))),
                  RandomString(&rnd, 80)));
  }
  ASSERT_OK(dbfull()->SetOptions({
      {"disable_auto_compactions", "false"},
  }));
  Flush();
  dbfull()->TEST_WaitForCompact();
  ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
  ASSERT_EQ(2U, int_prop);

  // Keep Writing data until base level changed 2->1. There will be L0->L2
  // compaction going on at the same time.
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
  for (int attempt = 0; attempt <= 20; attempt++) {
    // Write about 5K more data with two flushes. It should be flush to level 2
    // but when it is applied, base level is already 1.
    for (int i = 0; i < 50; i++) {
      ASSERT_OK(Put(Key(static_cast<int>(rnd.Uniform(kMaxKey))),
                    RandomString(&rnd, 80)));
    }
    Flush();

    ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
    if (int_prop == 2U) {
      env_->SleepForMicroseconds(50000);
    } else {
      break;
    }
  }
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
  rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();

  env_->SleepForMicroseconds(200000);

  ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
  ASSERT_EQ(1U, int_prop);
}

11340 11341 11342 11343 11344 11345 11346 11347 11348 11349 11350 11351 11352 11353 11354 11355 11356 11357 11358 11359 11360 11361 11362 11363 11364 11365 11366 11367 11368 11369 11370 11371 11372 11373 11374 11375 11376 11377 11378 11379 11380 11381 11382 11383
// Test specific cases in dynamic max bytes
TEST_F(DBTest, DynamicLevelMaxBytesCompactRange) {
  Random rnd(301);
  int kMaxKey = 1000000;

  Options options = CurrentOptions();
  options.create_if_missing = true;
  options.db_write_buffer_size = 2048;
  options.write_buffer_size = 2048;
  options.max_write_buffer_number = 2;
  options.level0_file_num_compaction_trigger = 2;
  options.level0_slowdown_writes_trigger = 9999;
  options.level0_stop_writes_trigger = 9999;
  options.target_file_size_base = 2;
  options.level_compaction_dynamic_level_bytes = true;
  options.max_bytes_for_level_base = 10240;
  options.max_bytes_for_level_multiplier = 4;
  options.max_background_compactions = 1;
  const int kNumLevels = 5;
  options.num_levels = kNumLevels;
  options.expanded_compaction_factor = 0;  // Force not expanding in compactions
  BlockBasedTableOptions table_options;
  table_options.block_size = 1024;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));

  DestroyAndReopen(options);

  // Compact against empty DB
  dbfull()->CompactRange(nullptr, nullptr);

  uint64_t int_prop;
  std::string str_prop;

  // Initial base level is the last level
  ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
  ASSERT_EQ(4U, int_prop);

  // Put about 7K to L0
  for (int i = 0; i < 140; i++) {
    ASSERT_OK(Put(Key(static_cast<int>(rnd.Uniform(kMaxKey))),
                  RandomString(&rnd, 80)));
  }
  Flush();
  dbfull()->TEST_WaitForCompact();
11384 11385 11386 11387 11388 11389
  if (NumTableFilesAtLevel(0) == 0) {
    // Make sure level 0 is not empty
    ASSERT_OK(Put(Key(static_cast<int>(rnd.Uniform(kMaxKey))),
                  RandomString(&rnd, 80)));
    Flush();
  }
11390 11391 11392 11393 11394 11395 11396 11397 11398 11399 11400 11401 11402 11403 11404 11405 11406 11407 11408 11409 11410 11411 11412 11413 11414 11415 11416 11417 11418 11419 11420 11421

  ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
  ASSERT_EQ(3U, int_prop);
  ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level1", &str_prop));
  ASSERT_EQ("0", str_prop);
  ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level2", &str_prop));
  ASSERT_EQ("0", str_prop);

  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
  rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();

  std::set<int> output_levels;
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
      "CompactionPicker::CompactRange:Return", [&](void* arg) {
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
        output_levels.insert(compaction->output_level());
      });
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();

  dbfull()->CompactRange(nullptr, nullptr);
  ASSERT_EQ(output_levels.size(), 2);
  ASSERT_TRUE(output_levels.find(3) != output_levels.end());
  ASSERT_TRUE(output_levels.find(4) != output_levels.end());
  ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level0", &str_prop));
  ASSERT_EQ("0", str_prop);
  ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level3", &str_prop));
  ASSERT_EQ("0", str_prop);
  // Base level is still level 3.
  ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop));
  ASSERT_EQ(3U, int_prop);
}

11422 11423 11424 11425 11426 11427 11428 11429 11430 11431 11432 11433 11434 11435 11436 11437 11438 11439 11440 11441 11442
TEST_F(DBTest, DynamicLevelMaxBytesBaseInc) {
  Options options = CurrentOptions();
  options.create_if_missing = true;
  options.db_write_buffer_size = 2048;
  options.write_buffer_size = 2048;
  options.max_write_buffer_number = 2;
  options.level0_file_num_compaction_trigger = 2;
  options.level0_slowdown_writes_trigger = 2;
  options.level0_stop_writes_trigger = 2;
  options.target_file_size_base = 2048;
  options.level_compaction_dynamic_level_bytes = true;
  options.max_bytes_for_level_base = 10240;
  options.max_bytes_for_level_multiplier = 4;
  options.hard_rate_limit = 1.1;
  options.max_background_compactions = 2;
  options.num_levels = 5;

  DestroyAndReopen(options);

  int non_trivial = 0;
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
11443 11444
      "DBImpl::BackgroundCompaction:NonTrivial",
      [&](void* arg) { non_trivial++; });
11445 11446 11447
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();

  Random rnd(301);
S
sdong 已提交
11448 11449 11450 11451 11452 11453
  const int total_keys = 3000;
  const int random_part_size = 100;
  for (int i = 0; i < total_keys; i++) {
    std::string value = RandomString(&rnd, random_part_size);
    PutFixed32(&value, static_cast<uint32_t>(i));
    ASSERT_OK(Put(Key(i), value));
11454 11455 11456 11457 11458 11459 11460
  }
  Flush();
  dbfull()->TEST_WaitForCompact();
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();

  ASSERT_EQ(non_trivial, 0);

S
sdong 已提交
11461 11462 11463 11464 11465 11466
  for (int i = 0; i < total_keys; i++) {
    std::string value = Get(Key(i));
    ASSERT_EQ(DecodeFixed32(value.c_str() + random_part_size),
              static_cast<uint32_t>(i));
  }

11467 11468 11469 11470
  env_->SetBackgroundThreads(1, Env::LOW);
  env_->SetBackgroundThreads(1, Env::HIGH);
}

11471 11472 11473 11474 11475 11476 11477 11478 11479 11480 11481 11482 11483 11484 11485 11486 11487 11488 11489 11490 11491
TEST_F(DBTest, MigrateToDynamicLevelMaxBytesBase) {
  Random rnd(301);
  const int kMaxKey = 2000;

  Options options;
  options.create_if_missing = true;
  options.db_write_buffer_size = 2048;
  options.write_buffer_size = 2048;
  options.max_write_buffer_number = 8;
  options.level0_file_num_compaction_trigger = 4;
  options.level0_slowdown_writes_trigger = 4;
  options.level0_stop_writes_trigger = 8;
  options.target_file_size_base = 2048;
  options.level_compaction_dynamic_level_bytes = false;
  options.max_bytes_for_level_base = 10240;
  options.max_bytes_for_level_multiplier = 4;
  options.hard_rate_limit = 1.1;
  options.num_levels = 8;

  DestroyAndReopen(options);

11492
  auto verify_func = [&](int num_keys, bool if_sleep) {
11493 11494 11495 11496 11497 11498 11499
    for (int i = 0; i < num_keys; i++) {
      ASSERT_NE("NOT_FOUND", Get(Key(kMaxKey + i)));
      if (i < num_keys / 10) {
        ASSERT_EQ("NOT_FOUND", Get(Key(i)));
      } else {
        ASSERT_NE("NOT_FOUND", Get(Key(i)));
      }
11500 11501 11502 11503 11504 11505
      if (if_sleep && i % 1000 == 0) {
        // Without it, valgrind may choose not to give another
        // thread a chance to run before finishing the function,
        // causing the test to be extremely slow.
        env_->SleepForMicroseconds(1);
      }
11506 11507 11508 11509 11510 11511 11512 11513 11514
    }
  };

  int total_keys = 1000;
  for (int i = 0; i < total_keys; i++) {
    ASSERT_OK(Put(Key(i), RandomString(&rnd, 102)));
    ASSERT_OK(Put(Key(kMaxKey + i), RandomString(&rnd, 102)));
    ASSERT_OK(Delete(Key(i / 10)));
  }
11515
  verify_func(total_keys, false);
11516 11517 11518 11519 11520
  dbfull()->TEST_WaitForCompact();

  options.level_compaction_dynamic_level_bytes = true;
  options.disable_auto_compactions = true;
  Reopen(options);
11521
  verify_func(total_keys, false);
11522 11523 11524 11525 11526 11527 11528 11529 11530

  std::atomic_bool compaction_finished(false);
  // Issue manual compaction in one thread and still verify DB state
  // in main thread.
  std::thread t([&]() {
    dbfull()->CompactRange(nullptr, nullptr, true, options.num_levels - 1);
    compaction_finished.store(true);
  });
  do {
11531
    verify_func(total_keys, true);
11532 11533 11534 11535 11536 11537 11538 11539 11540 11541 11542 11543 11544 11545
  } while (!compaction_finished.load());
  t.join();

  ASSERT_OK(dbfull()->SetOptions({
      {"disable_auto_compactions", "false"},
  }));

  int total_keys2 = 2000;
  for (int i = total_keys; i < total_keys2; i++) {
    ASSERT_OK(Put(Key(i), RandomString(&rnd, 102)));
    ASSERT_OK(Put(Key(kMaxKey + i), RandomString(&rnd, 102)));
    ASSERT_OK(Delete(Key(i / 10)));
  }

11546
  verify_func(total_keys2, false);
11547
  dbfull()->TEST_WaitForCompact();
11548
  verify_func(total_keys2, false);
11549 11550 11551 11552 11553

  // Base level is not level 1
  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
}
11554

11555 11556 11557 11558 11559 11560 11561 11562 11563 11564 11565 11566 11567 11568 11569 11570 11571 11572 11573 11574 11575 11576 11577 11578 11579 11580 11581 11582 11583 11584 11585 11586
namespace {
class OnFileDeletionListener : public EventListener {
 public:
  OnFileDeletionListener() :
      matched_count_(0),
      expected_file_name_("") {}

  void SetExpectedFileName(
      const std::string file_name) {
    expected_file_name_ = file_name;
  }

  void VerifyMatchedCount(size_t expected_value) {
    ASSERT_EQ(matched_count_, expected_value);
  }

  void OnTableFileDeleted(
      const TableFileDeletionInfo& info) override {
    if (expected_file_name_ != "") {
      ASSERT_EQ(expected_file_name_, info.file_path);
      expected_file_name_ = "";
      matched_count_++;
    }
  }

 private:
  size_t matched_count_;
  std::string expected_file_name_;
};

}  // namespace

I
Igor Sugak 已提交
11587
TEST_F(DBTest, DynamicLevelCompressionPerLevel) {
I
Igor Canadi 已提交
11588
  if (!Snappy_Supported()) {
11589 11590
    return;
  }
11591 11592 11593 11594 11595 11596 11597 11598 11599 11600 11601 11602 11603 11604 11605 11606 11607 11608 11609 11610 11611 11612 11613 11614 11615 11616 11617 11618
  const int kNKeys = 120;
  int keys[kNKeys];
  for (int i = 0; i < kNKeys; i++) {
    keys[i] = i;
  }
  std::random_shuffle(std::begin(keys), std::end(keys));

  Random rnd(301);
  Options options;
  options.create_if_missing = true;
  options.db_write_buffer_size = 20480;
  options.write_buffer_size = 20480;
  options.max_write_buffer_number = 2;
  options.level0_file_num_compaction_trigger = 2;
  options.level0_slowdown_writes_trigger = 2;
  options.level0_stop_writes_trigger = 2;
  options.target_file_size_base = 2048;
  options.level_compaction_dynamic_level_bytes = true;
  options.max_bytes_for_level_base = 102400;
  options.max_bytes_for_level_multiplier = 4;
  options.max_background_compactions = 1;
  options.num_levels = 5;

  options.compression_per_level.resize(3);
  options.compression_per_level[0] = kNoCompression;
  options.compression_per_level[1] = kNoCompression;
  options.compression_per_level[2] = kSnappyCompression;

11619 11620 11621
  OnFileDeletionListener* listener = new OnFileDeletionListener();
  options.listeners.emplace_back(listener);

11622 11623 11624 11625 11626 11627 11628 11629 11630 11631 11632 11633 11634 11635 11636 11637 11638 11639 11640 11641 11642 11643 11644 11645 11646 11647 11648 11649 11650 11651 11652 11653
  DestroyAndReopen(options);

  // Insert more than 80K. L4 should be base level. Neither L0 nor L4 should
  // be compressed, so total data size should be more than 80K.
  for (int i = 0; i < 20; i++) {
    ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000)));
  }
  Flush();
  dbfull()->TEST_WaitForCompact();

  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
  ASSERT_EQ(NumTableFilesAtLevel(3), 0);
  ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(4), 20U * 4000U);

  // Insert 400KB. Some data will be compressed
  for (int i = 21; i < 120; i++) {
    ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000)));
  }
  Flush();
  dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
  ASSERT_LT(SizeAtLevel(0) + SizeAtLevel(3) + SizeAtLevel(4), 120U * 4000U);
  // Make sure data in files in L3 is not compacted by removing all files
  // in L4 and calculate number of rows
  ASSERT_OK(dbfull()->SetOptions({
      {"disable_auto_compactions", "true"},
  }));
  ColumnFamilyMetaData cf_meta;
  db_->GetColumnFamilyMetaData(&cf_meta);
  for (auto file : cf_meta.levels[4].files) {
11654
    listener->SetExpectedFileName(dbname_ + file.name);
11655 11656
    ASSERT_OK(dbfull()->DeleteFile(file.name));
  }
11657 11658
  listener->VerifyMatchedCount(cf_meta.levels[4].files.size());

11659 11660 11661 11662 11663 11664 11665 11666 11667
  int num_keys = 0;
  std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
    num_keys++;
  }
  ASSERT_OK(iter->status());
  ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(3), num_keys * 4000U);
}

I
Igor Sugak 已提交
11668
TEST_F(DBTest, DynamicLevelCompressionPerLevel2) {
11669 11670 11671 11672 11673 11674 11675 11676 11677 11678 11679 11680 11681 11682 11683 11684 11685 11686 11687 11688 11689 11690 11691 11692 11693 11694 11695 11696 11697 11698 11699 11700 11701 11702 11703 11704 11705 11706 11707
  const int kNKeys = 500;
  int keys[kNKeys];
  for (int i = 0; i < kNKeys; i++) {
    keys[i] = i;
  }
  std::random_shuffle(std::begin(keys), std::end(keys));

  Random rnd(301);
  Options options;
  options.create_if_missing = true;
  options.db_write_buffer_size = 6000;
  options.write_buffer_size = 6000;
  options.max_write_buffer_number = 2;
  options.level0_file_num_compaction_trigger = 2;
  options.level0_slowdown_writes_trigger = 2;
  options.level0_stop_writes_trigger = 2;
  options.hard_rate_limit = 1.1;

  // Use file size to distinguish levels
  // L1: 10, L2: 20, L3 40, L4 80
  // L0 is less than 30
  options.target_file_size_base = 10;
  options.target_file_size_multiplier = 2;

  options.level_compaction_dynamic_level_bytes = true;
  options.max_bytes_for_level_base = 200;
  options.max_bytes_for_level_multiplier = 8;
  options.max_background_compactions = 1;
  options.num_levels = 5;
  std::shared_ptr<mock::MockTableFactory> mtf(new mock::MockTableFactory);
  options.table_factory = mtf;

  options.compression_per_level.resize(3);
  options.compression_per_level[0] = kNoCompression;
  options.compression_per_level[1] = kLZ4Compression;
  options.compression_per_level[2] = kZlibCompression;

  DestroyAndReopen(options);
  // When base level is L4, L4 is LZ4.
11708 11709 11710
  std::atomic<int> num_zlib(0);
  std::atomic<int> num_lz4(0);
  std::atomic<int> num_no(0);
11711 11712 11713 11714 11715
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
        if (compaction->output_level() == 4) {
          ASSERT_TRUE(compaction->OutputCompressionType() == kLZ4Compression);
11716
          num_lz4.fetch_add(1);
11717 11718
        }
      });
11719 11720 11721 11722 11723 11724
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
      "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) {
        auto* compression = reinterpret_cast<CompressionType*>(arg);
        ASSERT_TRUE(*compression == kNoCompression);
        num_no.fetch_add(1);
      });
11725 11726
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();

11727 11728 11729 11730 11731
  for (int i = 0; i < 100; i++) {
    ASSERT_OK(Put(Key(keys[i]), RandomString(&rnd, 200)));
  }
  Flush();
  dbfull()->TEST_WaitForCompact();
11732 11733
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
  rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
11734 11735 11736 11737

  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
  ASSERT_EQ(NumTableFilesAtLevel(3), 0);
11738
  ASSERT_GT(NumTableFilesAtLevel(4), 0);
11739 11740
  ASSERT_GT(num_no.load(), 2);
  ASSERT_GT(num_lz4.load(), 0);
11741
  int prev_num_files_l4 = NumTableFilesAtLevel(4);
11742 11743

  // After base level turn L4->L3, L3 becomes LZ4 and L4 becomes Zlib
11744 11745
  num_lz4.store(0);
  num_no.store(0);
11746 11747 11748 11749 11750
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
        if (compaction->output_level() == 4 && compaction->start_level() == 3) {
          ASSERT_TRUE(compaction->OutputCompressionType() == kZlibCompression);
11751
          num_zlib.fetch_add(1);
11752 11753
        } else {
          ASSERT_TRUE(compaction->OutputCompressionType() == kLZ4Compression);
11754
          num_lz4.fetch_add(1);
11755 11756
        }
      });
11757 11758 11759 11760 11761 11762 11763
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
      "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) {
        auto* compression = reinterpret_cast<CompressionType*>(arg);
        ASSERT_TRUE(*compression == kNoCompression);
        num_no.fetch_add(1);
      });
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
11764

11765 11766 11767 11768 11769 11770 11771
  for (int i = 101; i < 500; i++) {
    ASSERT_OK(Put(Key(keys[i]), RandomString(&rnd, 200)));
    if (i % 100 == 99) {
      Flush();
      dbfull()->TEST_WaitForCompact();
    }
  }
11772 11773 11774

  rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
11775 11776
  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
11777 11778
  ASSERT_GT(NumTableFilesAtLevel(3), 0);
  ASSERT_GT(NumTableFilesAtLevel(4), prev_num_files_l4);
11779 11780 11781
  ASSERT_GT(num_no.load(), 2);
  ASSERT_GT(num_lz4.load(), 0);
  ASSERT_GT(num_zlib.load(), 0);
11782 11783
}

I
Igor Sugak 已提交
11784
TEST_F(DBTest, DynamicCompactionOptions) {
11785 11786
  // minimum write buffer size is enforced at 64KB
  const uint64_t k32KB = 1 << 15;
11787 11788
  const uint64_t k64KB = 1 << 16;
  const uint64_t k128KB = 1 << 17;
11789
  const uint64_t k1MB = 1 << 20;
11790
  const uint64_t k4KB = 1 << 12;
11791 11792 11793 11794 11795
  Options options;
  options.env = env_;
  options.create_if_missing = true;
  options.compression = kNoCompression;
  options.hard_rate_limit = 1.1;
11796
  options.write_buffer_size = k64KB;
11797 11798 11799
  options.max_write_buffer_number = 2;
  // Compaction related options
  options.level0_file_num_compaction_trigger = 3;
L
Lei Jin 已提交
11800 11801
  options.level0_slowdown_writes_trigger = 4;
  options.level0_stop_writes_trigger = 8;
11802 11803 11804
  options.max_grandparent_overlap_factor = 10;
  options.expanded_compaction_factor = 25;
  options.source_compaction_factor = 1;
11805
  options.target_file_size_base = k64KB;
11806
  options.target_file_size_multiplier = 1;
11807
  options.max_bytes_for_level_base = k128KB;
11808
  options.max_bytes_for_level_multiplier = 4;
L
Lei Jin 已提交
11809 11810 11811 11812

  // Block flush thread and disable compaction thread
  env_->SetBackgroundThreads(1, Env::LOW);
  env_->SetBackgroundThreads(1, Env::HIGH);
L
Lei Jin 已提交
11813
  DestroyAndReopen(options);
11814

11815
  auto gen_l0_kb = [this](int start, int size, int stride) {
11816 11817
    Random rnd(301);
    for (int i = 0; i < size; i++) {
L
Lei Jin 已提交
11818
      ASSERT_OK(Put(Key(start + stride * i), RandomString(&rnd, 1024)));
11819 11820 11821 11822
    }
    dbfull()->TEST_WaitForFlushMemTable();
  };

11823 11824 11825
  // Write 3 files that have the same key range.
  // Since level0_file_num_compaction_trigger is 3, compaction should be
  // triggered. The compaction should result in one L1 file
11826
  gen_l0_kb(0, 64, 1);
11827
  ASSERT_EQ(NumTableFilesAtLevel(0), 1);
11828
  gen_l0_kb(0, 64, 1);
11829
  ASSERT_EQ(NumTableFilesAtLevel(0), 2);
11830
  gen_l0_kb(0, 64, 1);
11831 11832 11833 11834
  dbfull()->TEST_WaitForCompact();
  ASSERT_EQ("0,1", FilesPerLevel());
  std::vector<LiveFileMetaData> metadata;
  db_->GetLiveFilesMetaData(&metadata);
11835
  ASSERT_EQ(1U, metadata.size());
11836 11837
  ASSERT_LE(metadata[0].size, k64KB + k4KB);
  ASSERT_GE(metadata[0].size, k64KB - k4KB);
11838

11839
  // Test compaction trigger and target_file_size_base
11840 11841 11842 11843
  // Reduce compaction trigger to 2, and reduce L1 file size to 32KB.
  // Writing to 64KB L0 files should trigger a compaction. Since these
  // 2 L0 files have the same key range, compaction merge them and should
  // result in 2 32KB L1 files.
11844
  ASSERT_OK(dbfull()->SetOptions({
11845
    {"level0_file_num_compaction_trigger", "2"},
11846
    {"target_file_size_base", ToString(k32KB) }
11847 11848
  }));

11849
  gen_l0_kb(0, 64, 1);
11850
  ASSERT_EQ("1,1", FilesPerLevel());
11851
  gen_l0_kb(0, 64, 1);
11852 11853 11854 11855
  dbfull()->TEST_WaitForCompact();
  ASSERT_EQ("0,2", FilesPerLevel());
  metadata.clear();
  db_->GetLiveFilesMetaData(&metadata);
11856
  ASSERT_EQ(2U, metadata.size());
11857 11858
  ASSERT_LE(metadata[0].size, k32KB + k4KB);
  ASSERT_GE(metadata[0].size, k32KB - k4KB);
11859 11860
  ASSERT_LE(metadata[1].size, k32KB + k4KB);
  ASSERT_GE(metadata[1].size, k32KB - k4KB);
11861

11862
  // Test max_bytes_for_level_base
11863 11864 11865
  // Increase level base size to 256KB and write enough data that will
  // fill L1 and L2. L1 size should be around 256KB while L2 size should be
  // around 256KB x 4.
11866
  ASSERT_OK(dbfull()->SetOptions({
11867
    {"max_bytes_for_level_base", ToString(k1MB) }
11868
  }));
11869

11870 11871 11872 11873
  // writing 96 x 64KB => 6 * 1024KB
  // (L1 + L2) = (1 + 4) * 1024KB
  for (int i = 0; i < 96; ++i) {
    gen_l0_kb(i, 64, 96);
11874 11875
  }
  dbfull()->TEST_WaitForCompact();
11876 11877 11878 11879 11880 11881
  ASSERT_GT(SizeAtLevel(1), k1MB / 2);
  ASSERT_LT(SizeAtLevel(1), k1MB + k1MB / 2);

  // Within (0.5, 1.5) of 4MB.
  ASSERT_GT(SizeAtLevel(2), 2 * k1MB);
  ASSERT_LT(SizeAtLevel(2), 6 * k1MB);
11882

11883
  // Test max_bytes_for_level_multiplier and
11884 11885 11886
  // max_bytes_for_level_base. Now, reduce both mulitplier and level base,
  // After filling enough data that can fit in L1 - L3, we should see L1 size
  // reduces to 128KB from 256KB which was asserted previously. Same for L2.
11887
  ASSERT_OK(dbfull()->SetOptions({
11888
    {"max_bytes_for_level_multiplier", "2"},
11889
    {"max_bytes_for_level_base", ToString(k128KB) }
11890 11891
  }));

11892 11893 11894 11895
  // writing 20 x 64KB = 10 x 128KB
  // (L1 + L2 + L3) = (1 + 2 + 4) * 128KB
  for (int i = 0; i < 20; ++i) {
    gen_l0_kb(i, 64, 32);
11896 11897
  }
  dbfull()->TEST_WaitForCompact();
11898 11899 11900
  uint64_t total_size =
    SizeAtLevel(1) + SizeAtLevel(2) + SizeAtLevel(3);
  ASSERT_TRUE(total_size < k128KB * 7 * 1.5);
L
Lei Jin 已提交
11901

11902 11903 11904 11905
  // Test level0_stop_writes_trigger.
  // Clean up memtable and L0. Block compaction threads. If continue to write
  // and flush memtables. We should see put timeout after 8 memtable flushes
  // since level0_stop_writes_trigger = 8
L
Lei Jin 已提交
11906 11907 11908 11909 11910 11911 11912 11913 11914 11915 11916 11917 11918 11919
  dbfull()->CompactRange(nullptr, nullptr);
  // Block compaction
  SleepingBackgroundTask sleeping_task_low1;
  env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_low1,
                 Env::Priority::LOW);
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  int count = 0;
  Random rnd(301);
  WriteOptions wo;
  wo.timeout_hint_us = 10000;
  while (Put(Key(count), RandomString(&rnd, 1024), wo).ok() && count < 64) {
    dbfull()->TEST_FlushMemTable(true);
    count++;
  }
11920
  // Stop trigger = 8
L
Lei Jin 已提交
11921 11922 11923 11924 11925
  ASSERT_EQ(count, 8);
  // Unblock
  sleeping_task_low1.WakeUp();
  sleeping_task_low1.WaitUntilDone();

11926 11927 11928
  // Now reduce level0_stop_writes_trigger to 6. Clear up memtables and L0.
  // Block compaction thread again. Perform the put and memtable flushes
  // until we see timeout after 6 memtable flushes.
11929
  ASSERT_OK(dbfull()->SetOptions({
L
Lei Jin 已提交
11930 11931 11932 11933 11934 11935 11936 11937 11938 11939 11940 11941 11942 11943 11944 11945 11946 11947 11948 11949
    {"level0_stop_writes_trigger", "6"}
  }));
  dbfull()->CompactRange(nullptr, nullptr);
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);

  // Block compaction
  SleepingBackgroundTask sleeping_task_low2;
  env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_low2,
                 Env::Priority::LOW);
  count = 0;
  while (Put(Key(count), RandomString(&rnd, 1024), wo).ok() && count < 64) {
    dbfull()->TEST_FlushMemTable(true);
    count++;
  }
  ASSERT_EQ(count, 6);
  // Unblock
  sleeping_task_low2.WakeUp();
  sleeping_task_low2.WaitUntilDone();

  // Test disable_auto_compactions
11950 11951 11952 11953
  // Compaction thread is unblocked but auto compaction is disabled. Write
  // 4 L0 files and compaction should be triggered. If auto compaction is
  // disabled, then TEST_WaitForCompact will be waiting for nothing. Number of
  // L0 files do not change after the call.
11954
  ASSERT_OK(dbfull()->SetOptions({
L
Lei Jin 已提交
11955 11956 11957 11958 11959 11960 11961 11962 11963 11964 11965 11966 11967
    {"disable_auto_compactions", "true"}
  }));
  dbfull()->CompactRange(nullptr, nullptr);
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);

  for (int i = 0; i < 4; ++i) {
    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
    // Wait for compaction so that put won't timeout
    dbfull()->TEST_FlushMemTable(true);
  }
  dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(NumTableFilesAtLevel(0), 4);

11968 11969
  // Enable auto compaction and perform the same test, # of L0 files should be
  // reduced after compaction.
11970
  ASSERT_OK(dbfull()->SetOptions({
L
Lei Jin 已提交
11971 11972 11973 11974 11975 11976 11977 11978 11979 11980 11981 11982
    {"disable_auto_compactions", "false"}
  }));
  dbfull()->CompactRange(nullptr, nullptr);
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);

  for (int i = 0; i < 4; ++i) {
    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
    // Wait for compaction so that put won't timeout
    dbfull()->TEST_FlushMemTable(true);
  }
  dbfull()->TEST_WaitForCompact();
  ASSERT_LT(NumTableFilesAtLevel(0), 4);
11983

11984 11985 11986 11987
  // Test for hard_rate_limit.
  // First change max_bytes_for_level_base to a big value and populate
  // L1 - L3. Then thrink max_bytes_for_level_base and disable auto compaction
  // at the same time, we should see some level with score greater than 2.
11988
  ASSERT_OK(dbfull()->SetOptions({
11989
    {"max_bytes_for_level_base", ToString(k1MB) }
11990 11991 11992 11993 11994 11995 11996
  }));
  // writing 40 x 64KB = 10 x 256KB
  // (L1 + L2 + L3) = (1 + 2 + 4) * 256KB
  for (int i = 0; i < 40; ++i) {
    gen_l0_kb(i, 64, 32);
  }
  dbfull()->TEST_WaitForCompact();
11997 11998 11999 12000 12001 12002
  ASSERT_TRUE((SizeAtLevel(1) > k1MB * 0.8 &&
               SizeAtLevel(1) < k1MB * 1.2) ||
              (SizeAtLevel(2) > 2 * k1MB * 0.8 &&
               SizeAtLevel(2) < 2 * k1MB * 1.2) ||
              (SizeAtLevel(3) > 4 * k1MB * 0.8 &&
               SizeAtLevel(3) < 4 * k1MB * 1.2));
12003 12004
  // Reduce max_bytes_for_level_base and disable compaction at the same time
  // This should cause score to increase
12005
  ASSERT_OK(dbfull()->SetOptions({
12006 12007 12008 12009 12010 12011 12012 12013 12014 12015 12016
    {"disable_auto_compactions", "true"},
    {"max_bytes_for_level_base", "65536"},
  }));
  ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024)));
  dbfull()->TEST_FlushMemTable(true);

  // Check score is above 2
  ASSERT_TRUE(SizeAtLevel(1) / k64KB > 2 ||
              SizeAtLevel(2) / k64KB > 4 ||
              SizeAtLevel(3) / k64KB > 8);

12017 12018 12019
  // Enfoce hard rate limit. Now set hard_rate_limit to 2,
  // we should start to see put delay (1000 us) and timeout as a result
  // (L0 score is not regulated by this limit).
12020
  ASSERT_OK(dbfull()->SetOptions({
12021 12022 12023
    {"hard_rate_limit", "2"},
    {"level0_slowdown_writes_trigger", "18"},
    {"level0_stop_writes_trigger", "20"}
12024 12025 12026 12027
  }));
  ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024)));
  dbfull()->TEST_FlushMemTable(true);

S
sdong 已提交
12028 12029
  std::atomic<int> sleep_count(0);
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
12030
      "DBImpl::DelayWrite:Sleep", [&](void* arg) { sleep_count.fetch_add(1); });
S
sdong 已提交
12031 12032
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();

12033
  // Hard rate limit slow down for 1000 us, so default 10ms should be ok
S
sdong 已提交
12034 12035 12036 12037
  ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), wo));
  sleep_count.store(0);
  ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), wo));
  ASSERT_GT(sleep_count.load(), 0);
12038

12039
  // Lift the limit and no timeout
12040
  ASSERT_OK(dbfull()->SetOptions({
12041
    {"hard_rate_limit", "200"},
12042 12043
  }));
  dbfull()->TEST_FlushMemTable(true);
S
sdong 已提交
12044 12045 12046 12047 12048 12049
  sleep_count.store(0);
  ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), wo));
  // Technically, time out is still possible for timing issue.
  ASSERT_EQ(sleep_count.load(), 0);
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();

L
Lei Jin 已提交
12050 12051

  // Test max_mem_compaction_level.
C
clark.kang 已提交
12052
  // Destroy DB and start from scratch
L
Lei Jin 已提交
12053 12054 12055
  options.max_background_compactions = 1;
  options.max_background_flushes = 0;
  options.max_mem_compaction_level = 2;
L
Lei Jin 已提交
12056
  DestroyAndReopen(options);
L
Lei Jin 已提交
12057 12058 12059 12060
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  ASSERT_EQ(NumTableFilesAtLevel(2), 0);

S
sdong 已提交
12061
  ASSERT_OK(Put("max_mem_compaction_level_key", RandomString(&rnd, 8)));
L
Lei Jin 已提交
12062 12063 12064 12065 12066 12067 12068 12069
  dbfull()->TEST_FlushMemTable(true);
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  ASSERT_EQ(NumTableFilesAtLevel(2), 1);

  ASSERT_TRUE(Put("max_mem_compaction_level_key",
              RandomString(&rnd, 8)).ok());
  // Set new value and it becomes effective in this flush
12070
  ASSERT_OK(dbfull()->SetOptions({
L
Lei Jin 已提交
12071 12072 12073 12074 12075 12076 12077 12078 12079 12080
    {"max_mem_compaction_level", "1"}
  }));
  dbfull()->TEST_FlushMemTable(true);
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  ASSERT_EQ(NumTableFilesAtLevel(1), 1);
  ASSERT_EQ(NumTableFilesAtLevel(2), 1);

  ASSERT_TRUE(Put("max_mem_compaction_level_key",
              RandomString(&rnd, 8)).ok());
  // Set new value and it becomes effective in this flush
12081
  ASSERT_OK(dbfull()->SetOptions({
L
Lei Jin 已提交
12082 12083 12084 12085 12086 12087
    {"max_mem_compaction_level", "0"}
  }));
  dbfull()->TEST_FlushMemTable(true);
  ASSERT_EQ(NumTableFilesAtLevel(0), 1);
  ASSERT_EQ(NumTableFilesAtLevel(1), 1);
  ASSERT_EQ(NumTableFilesAtLevel(2), 1);
12088
}
12089

I
Igor Sugak 已提交
12090
TEST_F(DBTest, FileCreationRandomFailure) {
12091 12092 12093 12094 12095 12096 12097 12098
  Options options;
  options.env = env_;
  options.create_if_missing = true;
  options.write_buffer_size = 100000;  // Small write buffer
  options.target_file_size_base = 200000;
  options.max_bytes_for_level_base = 1000000;
  options.max_bytes_for_level_multiplier = 2;

L
Lei Jin 已提交
12099
  DestroyAndReopen(options);
12100 12101 12102 12103 12104 12105 12106 12107 12108 12109 12110 12111 12112 12113 12114 12115 12116 12117 12118 12119 12120 12121 12122 12123 12124 12125 12126 12127 12128 12129 12130 12131 12132 12133 12134 12135 12136 12137 12138 12139 12140 12141
  Random rnd(301);

  const int kTestSize = kCDTKeysPerBuffer * 4096;
  const int kTotalIteration = 100;
  // the second half of the test involves in random failure
  // of file creation.
  const int kRandomFailureTest = kTotalIteration / 2;
  std::vector<std::string> values;
  for (int i = 0; i < kTestSize; ++i) {
    values.push_back("NOT_FOUND");
  }
  for (int j = 0; j < kTotalIteration; ++j) {
    if (j == kRandomFailureTest) {
      env_->non_writeable_rate_.store(90);
    }
    for (int k = 0; k < kTestSize; ++k) {
      // here we expect some of the Put fails.
      std::string value = RandomString(&rnd, 100);
      Status s = Put(Key(k), Slice(value));
      if (s.ok()) {
        // update the latest successful put
        values[k] = value;
      }
      // But everything before we simulate the failure-test should succeed.
      if (j < kRandomFailureTest) {
        ASSERT_OK(s);
      }
    }
  }

  // If rocksdb does not do the correct job, internal assert will fail here.
  dbfull()->TEST_WaitForFlushMemTable();
  dbfull()->TEST_WaitForCompact();

  // verify we have the latest successful update
  for (int k = 0; k < kTestSize; ++k) {
    auto v = Get(Key(k));
    ASSERT_EQ(v, values[k]);
  }

  // reopen and reverify we have the latest successful update
  env_->non_writeable_rate_.store(0);
L
Lei Jin 已提交
12142
  Reopen(options);
12143 12144 12145 12146 12147 12148
  for (int k = 0; k < kTestSize; ++k) {
    auto v = Get(Key(k));
    ASSERT_EQ(v, values[k]);
  }
}

I
Igor Sugak 已提交
12149
TEST_F(DBTest, PartialCompactionFailure) {
12150 12151 12152 12153 12154 12155 12156 12157 12158 12159 12160 12161 12162 12163 12164 12165 12166 12167
  Options options;
  const int kKeySize = 16;
  const int kKvSize = 1000;
  const int kKeysPerBuffer = 100;
  const int kNumL1Files = 5;
  options.create_if_missing = true;
  options.write_buffer_size = kKeysPerBuffer * kKvSize;
  options.max_write_buffer_number = 2;
  options.target_file_size_base =
      options.write_buffer_size *
      (options.max_write_buffer_number - 1);
  options.level0_file_num_compaction_trigger = kNumL1Files;
  options.max_bytes_for_level_base =
      options.level0_file_num_compaction_trigger *
      options.target_file_size_base;
  options.max_bytes_for_level_multiplier = 2;
  options.compression = kNoCompression;

12168 12169 12170 12171 12172 12173 12174
  env_->SetBackgroundThreads(1, Env::HIGH);
  env_->SetBackgroundThreads(1, Env::LOW);
  // stop the compaction thread until we simulate the file creation failure.
  SleepingBackgroundTask sleeping_task_low;
  env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
                 Env::Priority::LOW);

12175 12176
  options.env = env_;

L
Lei Jin 已提交
12177
  DestroyAndReopen(options);
12178

I
Igor Canadi 已提交
12179
  const int kNumInsertedKeys =
12180 12181
      options.level0_file_num_compaction_trigger *
      (options.max_write_buffer_number - 1) *
12182
      kKeysPerBuffer;
12183 12184 12185 12186

  Random rnd(301);
  std::vector<std::string> keys;
  std::vector<std::string> values;
I
Igor Canadi 已提交
12187
  for (int k = 0; k < kNumInsertedKeys; ++k) {
12188 12189 12190 12191 12192
    keys.emplace_back(RandomString(&rnd, kKeySize));
    values.emplace_back(RandomString(&rnd, kKvSize - kKeySize));
    ASSERT_OK(Put(Slice(keys[k]), Slice(values[k])));
  }

12193 12194 12195 12196 12197
  dbfull()->TEST_FlushMemTable(true);
  // Make sure the number of L0 files can trigger compaction.
  ASSERT_GE(NumTableFilesAtLevel(0),
            options.level0_file_num_compaction_trigger);

12198
  auto previous_num_level0_files = NumTableFilesAtLevel(0);
12199

12200 12201 12202 12203
  // Fail the first file creation.
  env_->non_writable_count_ = 1;
  sleeping_task_low.WakeUp();
  sleeping_task_low.WaitUntilDone();
12204

12205 12206
  // Expect compaction to fail here as one file will fail its
  // creation.
12207
  ASSERT_TRUE(!dbfull()->TEST_WaitForCompact().ok());
12208

12209 12210
  // Verify L0 -> L1 compaction does fail.
  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
12211

12212
  // Verify all L0 files are still there.
12213
  ASSERT_EQ(NumTableFilesAtLevel(0), previous_num_level0_files);
12214 12215

  // All key-values must exist after compaction fails.
I
Igor Canadi 已提交
12216
  for (int k = 0; k < kNumInsertedKeys; ++k) {
12217 12218 12219
    ASSERT_EQ(values[k], Get(keys[k]));
  }

12220
  env_->non_writable_count_ = 0;
12221

12222
  // Make sure RocksDB will not get into corrupted state.
L
Lei Jin 已提交
12223
  Reopen(options);
12224 12225

  // Verify again after reopen.
I
Igor Canadi 已提交
12226
  for (int k = 0; k < kNumInsertedKeys; ++k) {
12227 12228 12229 12230
    ASSERT_EQ(values[k], Get(keys[k]));
  }
}

I
Igor Sugak 已提交
12231
TEST_F(DBTest, DynamicMiscOptions) {
12232 12233 12234 12235 12236 12237 12238
  // Test max_sequential_skip_in_iterations
  Options options;
  options.env = env_;
  options.create_if_missing = true;
  options.max_sequential_skip_in_iterations = 16;
  options.compression = kNoCompression;
  options.statistics = rocksdb::CreateDBStatistics();
L
Lei Jin 已提交
12239
  DestroyAndReopen(options);
12240 12241 12242 12243 12244 12245 12246 12247 12248 12249 12250 12251 12252 12253 12254 12255 12256 12257 12258 12259 12260 12261 12262 12263

  auto assert_reseek_count = [this, &options](int key_start, int num_reseek) {
    int key0 = key_start;
    int key1 = key_start + 1;
    int key2 = key_start + 2;
    Random rnd(301);
    ASSERT_OK(Put(Key(key0), RandomString(&rnd, 8)));
    for (int i = 0; i < 10; ++i) {
      ASSERT_OK(Put(Key(key1), RandomString(&rnd, 8)));
    }
    ASSERT_OK(Put(Key(key2), RandomString(&rnd, 8)));
    std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
    iter->Seek(Key(key1));
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(Key(key1)), 0);
    iter->Next();
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ(iter->key().compare(Key(key2)), 0);
    ASSERT_EQ(num_reseek,
              TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
  };
  // No reseek
  assert_reseek_count(100, 0);

12264
  ASSERT_OK(dbfull()->SetOptions({
12265 12266 12267 12268 12269 12270 12271
    {"max_sequential_skip_in_iterations", "4"}
  }));
  // Clear memtable and make new option effective
  dbfull()->TEST_FlushMemTable(true);
  // Trigger reseek
  assert_reseek_count(200, 1);

12272
  ASSERT_OK(dbfull()->SetOptions({
12273 12274 12275 12276 12277 12278 12279 12280
    {"max_sequential_skip_in_iterations", "16"}
  }));
  // Clear memtable and make new option effective
  dbfull()->TEST_FlushMemTable(true);
  // No reseek
  assert_reseek_count(300, 1);
}

I
Igor Sugak 已提交
12281
TEST_F(DBTest, DontDeletePendingOutputs) {
I
Igor Canadi 已提交
12282 12283 12284 12285 12286 12287 12288 12289
  Options options;
  options.env = env_;
  options.create_if_missing = true;
  DestroyAndReopen(options);

  // Every time we write to a table file, call FOF/POF with full DB scan. This
  // will make sure our pending_outputs_ protection work correctly
  std::function<void()> purge_obsolete_files_function = [&]() {
12290
    JobContext job_context(0);
I
Igor Canadi 已提交
12291 12292 12293 12294 12295 12296 12297 12298 12299 12300 12301 12302 12303 12304 12305 12306 12307 12308 12309 12310 12311
    dbfull()->TEST_LockMutex();
    dbfull()->FindObsoleteFiles(&job_context, true /*force*/);
    dbfull()->TEST_UnlockMutex();
    dbfull()->PurgeObsoleteFiles(job_context);
  };

  env_->table_write_callback_ = &purge_obsolete_files_function;

  for (int i = 0; i < 2; ++i) {
    ASSERT_OK(Put("a", "begin"));
    ASSERT_OK(Put("z", "end"));
    ASSERT_OK(Flush());
  }

  // If pending output guard does not work correctly, PurgeObsoleteFiles() will
  // delete the file that Compaction is trying to create, causing this: error
  // db/db_test.cc:975: IO error:
  // /tmp/rocksdbtest-1552237650/db_test/000009.sst: No such file or directory
  Compact("a", "b");
}

I
Igor Sugak 已提交
12312
TEST_F(DBTest, DontDeleteMovedFile) {
I
Igor Canadi 已提交
12313 12314 12315 12316 12317 12318 12319 12320 12321 12322 12323 12324 12325 12326 12327 12328 12329 12330 12331 12332 12333 12334 12335 12336 12337 12338 12339 12340 12341
  // This test triggers move compaction and verifies that the file is not
  // deleted when it's part of move compaction
  Options options = CurrentOptions();
  options.env = env_;
  options.create_if_missing = true;
  options.max_bytes_for_level_base = 1024 * 1024;  // 1 MB
  options.level0_file_num_compaction_trigger =
      2;  // trigger compaction when we have 2 files
  DestroyAndReopen(options);

  Random rnd(301);
  // Create two 1MB sst files
  for (int i = 0; i < 2; ++i) {
    // Create 1MB sst file
    for (int j = 0; j < 100; ++j) {
      ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024)));
    }
    ASSERT_OK(Flush());
  }
  // this should execute both L0->L1 and L1->(move)->L2 compactions
  dbfull()->TEST_WaitForCompact();
  ASSERT_EQ("0,0,1", FilesPerLevel(0));

  // If the moved file is actually deleted (the move-safeguard in
  // ~Version::Version() is not there), we get this failure:
  // Corruption: Can't access /000009.sst
  Reopen(options);
}

I
Igor Sugak 已提交
12342
TEST_F(DBTest, DeleteMovedFileAfterCompaction) {
I
Igor Canadi 已提交
12343 12344 12345 12346 12347 12348 12349 12350 12351 12352 12353 12354
  // iter 1 -- delete_obsolete_files_period_micros == 0
  for (int iter = 0; iter < 2; ++iter) {
    // This test triggers move compaction and verifies that the file is not
    // deleted when it's part of move compaction
    Options options = CurrentOptions();
    options.env = env_;
    if (iter == 1) {
      options.delete_obsolete_files_period_micros = 0;
    }
    options.create_if_missing = true;
    options.level0_file_num_compaction_trigger =
        2;  // trigger compaction when we have 2 files
12355 12356
    OnFileDeletionListener* listener = new OnFileDeletionListener();
    options.listeners.emplace_back(listener);
I
Igor Canadi 已提交
12357 12358 12359 12360 12361 12362 12363 12364 12365 12366 12367 12368 12369 12370 12371 12372 12373 12374 12375 12376 12377 12378 12379 12380 12381 12382 12383 12384 12385 12386 12387 12388 12389 12390 12391 12392 12393 12394 12395 12396 12397 12398 12399 12400 12401 12402 12403 12404 12405 12406 12407 12408
    DestroyAndReopen(options);

    Random rnd(301);
    // Create two 1MB sst files
    for (int i = 0; i < 2; ++i) {
      // Create 1MB sst file
      for (int j = 0; j < 100; ++j) {
        ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024)));
      }
      ASSERT_OK(Flush());
    }
    // this should execute L0->L1
    dbfull()->TEST_WaitForCompact();
    ASSERT_EQ("0,1", FilesPerLevel(0));

    // block compactions
    SleepingBackgroundTask sleeping_task;
    env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task,
                   Env::Priority::LOW);

    options.max_bytes_for_level_base = 1024 * 1024;  // 1 MB
    Reopen(options);
    std::unique_ptr<Iterator> iterator(db_->NewIterator(ReadOptions()));
    ASSERT_EQ("0,1", FilesPerLevel(0));
    // let compactions go
    sleeping_task.WakeUp();
    sleeping_task.WaitUntilDone();

    // this should execute L1->L2 (move)
    dbfull()->TEST_WaitForCompact();

    ASSERT_EQ("0,0,1", FilesPerLevel(0));

    std::vector<LiveFileMetaData> metadata;
    db_->GetLiveFilesMetaData(&metadata);
    ASSERT_EQ(metadata.size(), 1U);
    auto moved_file_name = metadata[0].name;

    // Create two more 1MB sst files
    for (int i = 0; i < 2; ++i) {
      // Create 1MB sst file
      for (int j = 0; j < 100; ++j) {
        ASSERT_OK(Put(Key(i * 50 + j + 100), RandomString(&rnd, 10 * 1024)));
      }
      ASSERT_OK(Flush());
    }
    // this should execute both L0->L1 and L1->L2 (merge with previous file)
    dbfull()->TEST_WaitForCompact();

    ASSERT_EQ("0,0,2", FilesPerLevel(0));

    // iterator is holding the file
12409
    ASSERT_TRUE(env_->FileExists(dbname_ + moved_file_name));
I
Igor Canadi 已提交
12410

12411
    listener->SetExpectedFileName(dbname_ + moved_file_name);
I
Igor Canadi 已提交
12412 12413 12414
    iterator.reset();

    // this file should have been compacted away
12415 12416
    ASSERT_TRUE(!env_->FileExists(dbname_ + moved_file_name));
    listener->VerifyMatchedCount(1);
I
Igor Canadi 已提交
12417 12418 12419
  }
}

I
Igor Sugak 已提交
12420
TEST_F(DBTest, OptimizeFiltersForHits) {
12421
  Options options = CurrentOptions();
12422 12423
  options.write_buffer_size = 256 * 1024;
  options.target_file_size_base = 256 * 1024;
12424 12425 12426
  options.level0_file_num_compaction_trigger = 2;
  options.level0_slowdown_writes_trigger = 2;
  options.level0_stop_writes_trigger = 4;
12427
  options.max_bytes_for_level_base = 256 * 1024;
12428 12429 12430 12431 12432 12433 12434 12435 12436 12437 12438 12439 12440 12441 12442 12443 12444 12445 12446 12447 12448 12449 12450 12451 12452 12453 12454 12455 12456 12457 12458 12459 12460 12461 12462 12463 12464 12465 12466 12467 12468 12469 12470 12471
  options.max_write_buffer_number = 2;
  options.max_background_compactions = 8;
  options.max_background_flushes = 8;
  options.compaction_style = kCompactionStyleLevel;
  BlockBasedTableOptions bbto;
  bbto.filter_policy.reset(NewBloomFilterPolicy(10, true));
  bbto.whole_key_filtering = true;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  options.optimize_filters_for_hits = true;
  options.statistics = rocksdb::CreateDBStatistics();
  CreateAndReopenWithCF({"mypikachu"}, options);

  int numkeys = 200000;
  for (int i = 0; i < 20; i += 2) {
    for (int j = i; j < numkeys; j += 20) {
      ASSERT_OK(Put(1, Key(j), "val"));
    }
  }


  ASSERT_OK(Flush(1));
  dbfull()->TEST_WaitForCompact();

  for (int i = 1; i < numkeys; i += 2) {
    ASSERT_EQ(Get(1, Key(i)), "NOT_FOUND");
  }

  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0));
  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1));
  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP));

  // When the skip_filters_on_last_level is ON, the last level which has
  // most of the keys does not use bloom filters. We end up using
  // bloom filters in a very small number of cases. Without the flag.
  // this number would be close to 150000 (all the key at the last level) +
  // some use in the upper levels
  //
  ASSERT_GT(90000, TestGetTickerCount(options, BLOOM_FILTER_USEFUL));

  for (int i = 0; i < numkeys; i += 2) {
    ASSERT_EQ(Get(1, Key(i)), "val");
  }
}

I
Igor Sugak 已提交
12472
TEST_F(DBTest, L0L1L2AndUpHitCounter) {
12473 12474 12475 12476 12477 12478 12479 12480 12481 12482 12483 12484 12485 12486 12487 12488 12489 12490 12491 12492 12493 12494 12495 12496 12497 12498 12499 12500 12501 12502 12503 12504 12505 12506 12507 12508 12509
  Options options = CurrentOptions();
  options.write_buffer_size = 32 * 1024;
  options.target_file_size_base = 32 * 1024;
  options.level0_file_num_compaction_trigger = 2;
  options.level0_slowdown_writes_trigger = 2;
  options.level0_stop_writes_trigger = 4;
  options.max_bytes_for_level_base = 64 * 1024;
  options.max_write_buffer_number = 2;
  options.max_background_compactions = 8;
  options.max_background_flushes = 8;
  options.statistics = rocksdb::CreateDBStatistics();
  CreateAndReopenWithCF({"mypikachu"}, options);

  int numkeys = 20000;
  for (int i = 0; i < numkeys; i++) {
    ASSERT_OK(Put(1, Key(i), "val"));
  }
  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0));
  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1));
  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP));

  ASSERT_OK(Flush(1));
  dbfull()->TEST_WaitForCompact();

  for (int i = 0; i < numkeys; i++) {
    ASSERT_EQ(Get(1, Key(i)), "val");
  }

  ASSERT_GT(TestGetTickerCount(options, GET_HIT_L0), 100);
  ASSERT_GT(TestGetTickerCount(options, GET_HIT_L1), 100);
  ASSERT_GT(TestGetTickerCount(options, GET_HIT_L2_AND_UP), 100);

  ASSERT_EQ(numkeys, TestGetTickerCount(options, GET_HIT_L0) +
                         TestGetTickerCount(options, GET_HIT_L1) +
                         TestGetTickerCount(options, GET_HIT_L2_AND_UP));
}

I
Igor Sugak 已提交
12510
TEST_F(DBTest, EncodeDecompressedBlockSizeTest) {
12511 12512 12513 12514 12515 12516 12517 12518 12519 12520 12521 12522 12523 12524 12525 12526 12527 12528 12529 12530 12531 12532 12533 12534 12535 12536 12537 12538 12539 12540 12541 12542 12543 12544 12545 12546 12547 12548 12549 12550 12551
  // iter 0 -- zlib
  // iter 1 -- bzip2
  // iter 2 -- lz4
  // iter 3 -- lz4HC
  CompressionType compressions[] = {kZlibCompression, kBZip2Compression,
                                    kLZ4Compression,  kLZ4HCCompression};
  for (int iter = 0; iter < 4; ++iter) {
    // first_table_version 1 -- generate with table_version == 1, read with
    // table_version == 2
    // first_table_version 2 -- generate with table_version == 2, read with
    // table_version == 1
    for (int first_table_version = 1; first_table_version <= 2;
         ++first_table_version) {
      BlockBasedTableOptions table_options;
      table_options.format_version = first_table_version;
      table_options.filter_policy.reset(NewBloomFilterPolicy(10));
      Options options = CurrentOptions();
      options.table_factory.reset(NewBlockBasedTableFactory(table_options));
      options.create_if_missing = true;
      options.compression = compressions[iter];
      DestroyAndReopen(options);

      int kNumKeysWritten = 100000;

      Random rnd(301);
      for (int i = 0; i < kNumKeysWritten; ++i) {
        // compressible string
        ASSERT_OK(Put(Key(i), RandomString(&rnd, 128) + std::string(128, 'a')));
      }

      table_options.format_version = first_table_version == 1 ? 2 : 1;
      options.table_factory.reset(NewBlockBasedTableFactory(table_options));
      Reopen(options);
      for (int i = 0; i < kNumKeysWritten; ++i) {
        auto r = Get(Key(i));
        ASSERT_EQ(r.substr(128), std::string(128, 'a'));
      }
    }
  }
}

I
Igor Sugak 已提交
12552
TEST_F(DBTest, MutexWaitStats) {
12553 12554 12555 12556 12557 12558 12559 12560 12561 12562 12563 12564 12565 12566
  Options options = CurrentOptions();
  options.create_if_missing = true;
  options.statistics = rocksdb::CreateDBStatistics();
  CreateAndReopenWithCF({"pikachu"}, options);
  const int64_t kMutexWaitDelay = 100;
  ThreadStatusUtil::TEST_SetStateDelay(
      ThreadStatus::STATE_MUTEX_WAIT, kMutexWaitDelay);
  ASSERT_OK(Put("hello", "rocksdb"));
  ASSERT_GE(TestGetTickerCount(
            options, DB_MUTEX_WAIT_MICROS), kMutexWaitDelay);
  ThreadStatusUtil::TEST_SetStateDelay(
      ThreadStatus::STATE_MUTEX_WAIT, 0);
}

I
Igor Canadi 已提交
12567 12568 12569 12570 12571 12572 12573 12574 12575 12576 12577 12578 12579 12580
// This reproduces a bug where we don't delete a file because when it was
// supposed to be deleted, it was blocked by pending_outputs
// Consider:
// 1. current file_number is 13
// 2. compaction (1) starts, blocks deletion of all files starting with 13
// (pending outputs)
// 3. file 13 is created by compaction (2)
// 4. file 13 is consumed by compaction (3) and file 15 was created. Since file
// 13 has no references, it is put into VersionSet::obsolete_files_
// 5. FindObsoleteFiles() gets file 13 from VersionSet::obsolete_files_. File 13
// is deleted from obsolete_files_ set.
// 6. PurgeObsoleteFiles() tries to delete file 13, but this file is blocked by
// pending outputs since compaction (1) is still running. It is not deleted and
// it is not present in obsolete_files_ anymore. Therefore, we never delete it.
I
Igor Sugak 已提交
12581
TEST_F(DBTest, DeleteObsoleteFilesPendingOutputs) {
I
Igor Canadi 已提交
12582 12583 12584 12585 12586 12587 12588 12589
  Options options = CurrentOptions();
  options.env = env_;
  options.write_buffer_size = 2 * 1024 * 1024;     // 2 MB
  options.max_bytes_for_level_base = 1024 * 1024;  // 1 MB
  options.level0_file_num_compaction_trigger =
      2;  // trigger compaction when we have 2 files
  options.max_background_flushes = 2;
  options.max_background_compactions = 2;
12590 12591 12592 12593

  OnFileDeletionListener* listener = new OnFileDeletionListener();
  options.listeners.emplace_back(listener);

I
Igor Canadi 已提交
12594 12595 12596 12597 12598 12599 12600 12601 12602 12603 12604 12605 12606 12607 12608 12609 12610 12611 12612 12613 12614 12615 12616 12617 12618 12619 12620 12621 12622 12623 12624 12625 12626 12627 12628 12629 12630 12631 12632 12633 12634 12635 12636 12637 12638 12639 12640 12641
  Reopen(options);

  Random rnd(301);
  // Create two 1MB sst files
  for (int i = 0; i < 2; ++i) {
    // Create 1MB sst file
    for (int j = 0; j < 100; ++j) {
      ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024)));
    }
    ASSERT_OK(Flush());
  }
  // this should execute both L0->L1 and L1->(move)->L2 compactions
  dbfull()->TEST_WaitForCompact();
  ASSERT_EQ("0,0,1", FilesPerLevel(0));

  SleepingBackgroundTask blocking_thread;
  port::Mutex mutex_;
  bool already_blocked(false);

  // block the flush
  std::function<void()> block_first_time = [&]() {
    bool blocking = false;
    {
      MutexLock l(&mutex_);
      if (!already_blocked) {
        blocking = true;
        already_blocked = true;
      }
    }
    if (blocking) {
      blocking_thread.DoSleep();
    }
  };
  env_->table_write_callback_ = &block_first_time;
  // Create 1MB sst file
  for (int j = 0; j < 256; ++j) {
    ASSERT_OK(Put(Key(j), RandomString(&rnd, 10 * 1024)));
  }
  // this should trigger a flush, which is blocked with block_first_time
  // pending_file is protecting all the files created after

  ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr));

  ASSERT_EQ("0,0,0,1", FilesPerLevel(0));
  std::vector<LiveFileMetaData> metadata;
  db_->GetLiveFilesMetaData(&metadata);
  ASSERT_EQ(metadata.size(), 1U);
  auto file_on_L2 = metadata[0].name;
12642
  listener->SetExpectedFileName(dbname_ + file_on_L2);
I
Igor Canadi 已提交
12643

12644 12645
  ASSERT_OK(dbfull()->TEST_CompactRange(3, nullptr, nullptr, nullptr,
                                        true /* disallow trivial move */));
I
Igor Canadi 已提交
12646 12647 12648 12649 12650 12651 12652 12653 12654 12655 12656 12657
  ASSERT_EQ("0,0,0,0,1", FilesPerLevel(0));

  // finish the flush!
  blocking_thread.WakeUp();
  blocking_thread.WaitUntilDone();
  dbfull()->TEST_WaitForFlushMemTable();
  ASSERT_EQ("1,0,0,0,1", FilesPerLevel(0));

  metadata.clear();
  db_->GetLiveFilesMetaData(&metadata);
  ASSERT_EQ(metadata.size(), 2U);

12658
  // This file should have been deleted during last compaction
12659 12660
  ASSERT_TRUE(!env_->FileExists(dbname_ + file_on_L2));
  listener->VerifyMatchedCount(1);
I
Igor Canadi 已提交
12661 12662
}

I
Igor Sugak 已提交
12663
TEST_F(DBTest, CloseSpeedup) {
12664 12665 12666 12667 12668 12669 12670 12671 12672 12673 12674 12675 12676 12677 12678 12679 12680 12681 12682 12683 12684 12685 12686 12687 12688 12689 12690 12691 12692 12693 12694 12695 12696 12697 12698 12699 12700 12701 12702 12703 12704 12705 12706 12707 12708 12709 12710 12711 12712 12713 12714 12715
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleLevel;
  options.write_buffer_size = 100 << 10;  // 100KB
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 4;
  options.max_bytes_for_level_base = 400 * 1024;
  options.max_write_buffer_number = 16;

  // Block background threads
  env_->SetBackgroundThreads(1, Env::LOW);
  env_->SetBackgroundThreads(1, Env::HIGH);
  SleepingBackgroundTask sleeping_task_low;
  env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
                 Env::Priority::LOW);
  SleepingBackgroundTask sleeping_task_high;
  env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_high,
                 Env::Priority::HIGH);

  std::vector<std::string> filenames;
  env_->GetChildren(dbname_, &filenames);
  // Delete archival files.
  for (size_t i = 0; i < filenames.size(); ++i) {
    env_->DeleteFile(dbname_ + "/" + filenames[i]);
  }
  env_->DeleteDir(dbname_);
  DestroyAndReopen(options);

  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
  env_->SetBackgroundThreads(1, Env::LOW);
  env_->SetBackgroundThreads(1, Env::HIGH);
  Random rnd(301);
  int key_idx = 0;

  // First three 110KB files are not going to level 2
  // After that, (100K, 200K)
  for (int num = 0; num < 5; num++) {
    GenerateNewFile(&rnd, &key_idx, true);
  }

  ASSERT_EQ(0, GetSstFileCount(dbname_));

  Close();
  ASSERT_EQ(0, GetSstFileCount(dbname_));

  // Unblock background threads
  sleeping_task_high.WakeUp();
  sleeping_task_high.WaitUntilDone();
  sleeping_task_low.WakeUp();
  sleeping_task_low.WaitUntilDone();

  Destroy(options);
}
12716 12717 12718 12719 12720 12721 12722 12723 12724 12725 12726 12727 12728 12729 12730 12731 12732 12733 12734 12735 12736 12737 12738 12739 12740 12741 12742 12743 12744 12745 12746 12747 12748 12749 12750 12751 12752 12753 12754 12755 12756 12757 12758 12759 12760 12761

class DelayedMergeOperator : public AssociativeMergeOperator {
 private:
  DBTest* db_test_;

 public:
  explicit DelayedMergeOperator(DBTest* d) : db_test_(d) {}
  virtual bool Merge(const Slice& key, const Slice* existing_value,
                     const Slice& value, std::string* new_value,
                     Logger* logger) const override {
    db_test_->env_->addon_time_ += 1000;
    return true;
  }

  virtual const char* Name() const override { return "DelayedMergeOperator"; }
};

TEST_F(DBTest, MergeTestTime) {
  std::string one, two, three;
  PutFixed64(&one, 1);
  PutFixed64(&two, 2);
  PutFixed64(&three, 3);

  // Enable time profiling
  SetPerfLevel(kEnableTime);
  this->env_->addon_time_ = 0;
  Options options;
  options = CurrentOptions(options);
  options.statistics = rocksdb::CreateDBStatistics();
  options.merge_operator.reset(new DelayedMergeOperator(this));
  DestroyAndReopen(options);

  ASSERT_EQ(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0);
  db_->Put(WriteOptions(), "foo", one);
  ASSERT_OK(Flush());
  ASSERT_OK(db_->Merge(WriteOptions(), "foo", two));
  ASSERT_OK(Flush());
  ASSERT_OK(db_->Merge(WriteOptions(), "foo", three));
  ASSERT_OK(Flush());

  ReadOptions opt;
  opt.verify_checksums = true;
  opt.snapshot = nullptr;
  std::string result;
  db_->Get(opt, "foo", &result);

S
sdong 已提交
12762 12763
  ASSERT_LT(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 2800000);
  ASSERT_GT(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 1200000);
12764 12765 12766 12767 12768 12769 12770 12771 12772 12773 12774

  ReadOptions read_options;
  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  int count = 0;
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
    ASSERT_OK(iter->status());
    ++count;
  }

  ASSERT_EQ(1, count);

S
sdong 已提交
12775 12776
  ASSERT_LT(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 6000000);
  ASSERT_GT(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 3200000);
12777 12778 12779 12780 12781 12782 12783 12784 12785 12786 12787 12788 12789 12790 12791 12792 12793 12794 12795 12796 12797 12798 12799 12800 12801 12802 12803 12804 12805 12806 12807 12808 12809 12810 12811 12812 12813 12814 12815 12816 12817 12818 12819 12820 12821 12822 12823 12824 12825 12826 12827
}

TEST_F(DBTest, MergeCompactionTimeTest) {
  SetPerfLevel(kEnableTime);
  Options options;
  options = CurrentOptions(options);
  options.compaction_filter_factory = std::make_shared<KeepFilterFactory>();
  options.statistics = rocksdb::CreateDBStatistics();
  options.merge_operator.reset(new DelayedMergeOperator(this));
  options.compaction_style = kCompactionStyleUniversal;
  DestroyAndReopen(options);

  for (int i = 0; i < 1000; i++) {
    ASSERT_OK(db_->Merge(WriteOptions(), "foo", "TEST"));
    ASSERT_OK(Flush());
  }
  dbfull()->TEST_WaitForFlushMemTable();
  dbfull()->TEST_WaitForCompact();

  ASSERT_NE(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0);
}

TEST_F(DBTest, FilterCompactionTimeTest) {
  Options options;
  options.compaction_filter_factory =
      std::make_shared<DelayFilterFactory>(this);
  options.disable_auto_compactions = true;
  options.create_if_missing = true;
  options.statistics = rocksdb::CreateDBStatistics();
  options = CurrentOptions(options);
  DestroyAndReopen(options);

  // put some data
  for (int table = 0; table < 4; ++table) {
    for (int i = 0; i < 10 + table; ++i) {
      Put(ToString(table * 100 + i), "val");
    }
    Flush();
  }

  ASSERT_OK(db_->CompactRange(nullptr, nullptr));
  ASSERT_EQ(0U, CountLiveFiles());

  Reopen(options);

  Iterator* itr = db_->NewIterator(ReadOptions());
  itr->SeekToFirst();
  ASSERT_NE(TestGetTickerCount(options, FILTER_OPERATION_TOTAL_TIME), 0);
  delete itr;
}

12828 12829 12830 12831 12832 12833 12834 12835 12836 12837 12838 12839 12840 12841 12842
TEST_F(DBTest, TestLogCleanup) {
  Options options = CurrentOptions();
  options.write_buffer_size = 64 * 1024;  // very small
  // only two memtables allowed ==> only two log files
  options.max_write_buffer_number = 2;
  Reopen(options);

  for (int i = 0; i < 100000; ++i) {
    Put(Key(i), "val");
    // only 2 memtables will be alive, so logs_to_free needs to always be below
    // 2
    ASSERT_LT(dbfull()->TEST_LogsToFreeSize(), static_cast<size_t>(3));
  }
}

12843 12844 12845 12846 12847 12848 12849 12850 12851 12852 12853
TEST_F(DBTest, EmptyCompactedDB) {
  Options options;
  options.max_open_files = -1;
  options = CurrentOptions(options);
  Close();
  ASSERT_OK(ReadOnlyReopen(options));
  Status s = Put("new", "value");
  ASSERT_TRUE(s.IsNotSupported());
  Close();
}

12854 12855 12856 12857 12858 12859 12860 12861 12862 12863 12864 12865 12866 12867 12868 12869
TEST_F(DBTest, CompressLevelCompaction) {
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleLevel;
  options.write_buffer_size = 100 << 10;  // 100KB
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 4;
  options.max_bytes_for_level_base = 400 * 1024;
  // First two levels have no compression, so that a trivial move between
  // them will be allowed. Level 2 has Zlib compression so that a trivial
  // move to level 3 will not be allowed
  options.compression_per_level = {kNoCompression, kNoCompression,
                                   kZlibCompression};
  int matches = 0, didnt_match = 0, trivial_move = 0, non_trivial = 0;

  rocksdb::SyncPoint::GetInstance()->SetCallBack(
      "Compaction::InputCompressionMatchesOutput:Matches",
12870
      [&](void* arg) { matches++; });
12871 12872
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
      "Compaction::InputCompressionMatchesOutput:DidntMatch",
12873
      [&](void* arg) { didnt_match++; });
12874
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
12875 12876
      "DBImpl::BackgroundCompaction:NonTrivial",
      [&](void* arg) { non_trivial++; });
12877
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
12878 12879
      "DBImpl::BackgroundCompaction:TrivialMove",
      [&](void* arg) { trivial_move++; });
12880 12881 12882 12883 12884 12885 12886 12887 12888 12889 12890 12891 12892 12893 12894 12895 12896 12897 12898 12899 12900 12901 12902 12903 12904 12905 12906 12907 12908 12909 12910 12911 12912 12913 12914 12915 12916 12917 12918 12919 12920 12921 12922 12923 12924 12925 12926 12927 12928 12929 12930 12931 12932 12933
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();

  Reopen(options);

  Random rnd(301);
  int key_idx = 0;

  // First three 110KB files are going to level 0
  // After that, (100K, 200K)
  for (int num = 0; num < 3; num++) {
    GenerateNewFile(&rnd, &key_idx);
  }

  // Another 110KB triggers a compaction to 400K file to fill up level 0
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(4, GetSstFileCount(dbname_));

  // (1, 4)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4", FilesPerLevel(0));

  // (1, 4, 1)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,1", FilesPerLevel(0));

  // (1, 4, 2)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,2", FilesPerLevel(0));

  // (1, 4, 3)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,3", FilesPerLevel(0));

  // (1, 4, 4)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,4", FilesPerLevel(0));

  // (1, 4, 5)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,5", FilesPerLevel(0));

  // (1, 4, 6)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,6", FilesPerLevel(0));

  // (1, 4, 7)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,7", FilesPerLevel(0));

  // (1, 4, 8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,8", FilesPerLevel(0));

  ASSERT_EQ(matches, 12);
12934 12935 12936 12937
  // Currently, the test relies on the number of calls to
  // InputCompressionMatchesOutput() per compaction.
  const int kCallsToInputCompressionMatch = 2;
  ASSERT_EQ(didnt_match, 8 * kCallsToInputCompressionMatch);
12938 12939 12940 12941 12942 12943 12944 12945 12946 12947 12948 12949 12950 12951 12952 12953 12954 12955 12956 12957 12958 12959
  ASSERT_EQ(trivial_move, 12);
  ASSERT_EQ(non_trivial, 8);

  rocksdb::SyncPoint::GetInstance()->DisableProcessing();

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 10000);
  }

  Reopen(options);

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 10000);
  }

  Destroy(options);
}

12960
TEST_F(DBTest, SuggestCompactRangeTest) {
12961 12962 12963
  class CompactionFilterFactoryGetContext : public CompactionFilterFactory {
   public:
    virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
I
Igor Canadi 已提交
12964
        const CompactionFilter::Context& context) override {
12965 12966 12967 12968 12969 12970 12971 12972 12973 12974 12975 12976 12977 12978
      saved_context = context;
      std::unique_ptr<CompactionFilter> empty_filter;
      return empty_filter;
    }
    const char* Name() const override {
      return "CompactionFilterFactoryGetContext";
    }
    static bool IsManual(CompactionFilterFactory* compaction_filter_factory) {
      return reinterpret_cast<CompactionFilterFactoryGetContext*>(
          compaction_filter_factory)->saved_context.is_manual_compaction;
    }
    CompactionFilter::Context saved_context;
  };

12979 12980
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleLevel;
12981 12982 12983 12984
  options.compaction_filter_factory.reset(
      new CompactionFilterFactoryGetContext());
  options.write_buffer_size = 110 << 10;
  options.level0_file_num_compaction_trigger = 4;
12985
  options.num_levels = 4;
12986 12987 12988 12989
  options.compression = kNoCompression;
  options.max_bytes_for_level_base = 450 << 10;
  options.target_file_size_base = 98 << 10;
  options.max_grandparent_overlap_factor = 1 << 20;  // inf
12990 12991 12992 12993 12994 12995

  Reopen(options);

  Random rnd(301);

  for (int num = 0; num < 3; num++) {
12996
    GenerateNewRandomFile(&rnd);
12997 12998
  }

12999 13000 13001 13002
  GenerateNewRandomFile(&rnd);
  ASSERT_EQ("0,4", FilesPerLevel(0));
  ASSERT_TRUE(!CompactionFilterFactoryGetContext::IsManual(
                   options.compaction_filter_factory.get()));
13003

13004
  GenerateNewRandomFile(&rnd);
13005 13006
  ASSERT_EQ("1,4", FilesPerLevel(0));

13007 13008
  GenerateNewRandomFile(&rnd);
  ASSERT_EQ("2,4", FilesPerLevel(0));
13009

13010 13011
  GenerateNewRandomFile(&rnd);
  ASSERT_EQ("3,4", FilesPerLevel(0));
13012

13013 13014
  GenerateNewRandomFile(&rnd);
  ASSERT_EQ("0,4,4", FilesPerLevel(0));
13015

13016
  GenerateNewRandomFile(&rnd);
13017 13018
  ASSERT_EQ("1,4,4", FilesPerLevel(0));

13019 13020
  GenerateNewRandomFile(&rnd);
  ASSERT_EQ("2,4,4", FilesPerLevel(0));
13021

13022 13023
  GenerateNewRandomFile(&rnd);
  ASSERT_EQ("3,4,4", FilesPerLevel(0));
13024

13025 13026
  GenerateNewRandomFile(&rnd);
  ASSERT_EQ("0,4,8", FilesPerLevel(0));
13027

13028
  GenerateNewRandomFile(&rnd);
13029 13030 13031 13032 13033 13034 13035 13036 13037 13038
  ASSERT_EQ("1,4,8", FilesPerLevel(0));

  // compact it three times
  for (int i = 0; i < 3; ++i) {
    ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr));
    dbfull()->TEST_WaitForCompact();
  }

  ASSERT_EQ("0,0,13", FilesPerLevel(0));

13039
  GenerateNewRandomFile(&rnd);
13040 13041 13042 13043 13044 13045 13046 13047 13048 13049 13050 13051 13052 13053
  ASSERT_EQ("1,0,13", FilesPerLevel(0));

  // nonoverlapping with the file on level 0
  Slice start("a"), end("b");
  ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
  dbfull()->TEST_WaitForCompact();

  // should not compact the level 0 file
  ASSERT_EQ("1,0,13", FilesPerLevel(0));

  start = Slice("j");
  end = Slice("m");
  ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
  dbfull()->TEST_WaitForCompact();
13054 13055
  ASSERT_TRUE(CompactionFilterFactoryGetContext::IsManual(
      options.compaction_filter_factory.get()));
13056 13057 13058 13059 13060

  // now it should compact the level 0 file
  ASSERT_EQ("0,1,13", FilesPerLevel(0));
}

13061 13062 13063 13064 13065 13066 13067 13068 13069 13070 13071 13072 13073 13074 13075 13076 13077 13078 13079 13080 13081 13082 13083 13084 13085 13086 13087 13088 13089 13090 13091 13092 13093 13094 13095 13096 13097 13098 13099 13100 13101 13102 13103 13104 13105 13106 13107 13108 13109 13110 13111 13112 13113 13114 13115 13116 13117 13118 13119 13120 13121 13122 13123 13124 13125 13126
TEST_F(DBTest, PromoteL0) {
  Options options = CurrentOptions();
  options.disable_auto_compactions = true;
  options.write_buffer_size = 10 * 1024 * 1024;
  DestroyAndReopen(options);

  // non overlapping ranges
  std::vector<std::pair<int32_t, int32_t>> ranges = {
      {81, 160}, {0, 80}, {161, 240}, {241, 320}};

  int32_t value_size = 10 * 1024;  // 10 KB

  Random rnd(301);
  std::map<int32_t, std::string> values;
  for (const auto& range : ranges) {
    for (int32_t j = range.first; j < range.second; j++) {
      values[j] = RandomString(&rnd, value_size);
      ASSERT_OK(Put(Key(j), values[j]));
    }
    ASSERT_OK(Flush());
  }

  int32_t level0_files = NumTableFilesAtLevel(0, 0);
  ASSERT_EQ(level0_files, ranges.size());
  ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0);  // No files in L1

  // Promote L0 level to L2.
  ASSERT_OK(experimental::PromoteL0(db_, db_->DefaultColumnFamily(), 2));
  // We expect that all the files were trivially moved from L0 to L2
  ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
  ASSERT_EQ(NumTableFilesAtLevel(2, 0), level0_files);

  for (const auto& kv : values) {
    ASSERT_EQ(Get(Key(kv.first)), kv.second);
  }
}

TEST_F(DBTest, PromoteL0Failure) {
  Options options = CurrentOptions();
  options.disable_auto_compactions = true;
  options.write_buffer_size = 10 * 1024 * 1024;
  DestroyAndReopen(options);

  // Produce two L0 files with overlapping ranges.
  ASSERT_OK(Put(Key(0), ""));
  ASSERT_OK(Put(Key(3), ""));
  ASSERT_OK(Flush());
  ASSERT_OK(Put(Key(1), ""));
  ASSERT_OK(Flush());

  Status status;
  // Fails because L0 has overlapping files.
  status = experimental::PromoteL0(db_, db_->DefaultColumnFamily());
  ASSERT_TRUE(status.IsInvalidArgument());

  ASSERT_OK(db_->CompactRange(nullptr, nullptr));
  // Now there is a file in L1.
  ASSERT_GE(NumTableFilesAtLevel(1, 0), 1);

  ASSERT_OK(Put(Key(5), ""));
  ASSERT_OK(Flush());
  // Fails because L1 is non-empty.
  status = experimental::PromoteL0(db_, db_->DefaultColumnFamily());
  ASSERT_TRUE(status.IsInvalidArgument());
}

13127 13128 13129 13130 13131 13132 13133 13134 13135 13136 13137 13138 13139 13140 13141 13142 13143 13144 13145
// Github issue #596
TEST_F(DBTest, HugeNumberOfLevels) {
  Options options = CurrentOptions();
  options.write_buffer_size = 2 * 1024 * 1024;         // 2MB
  options.max_bytes_for_level_base = 2 * 1024 * 1024;  // 2MB
  options.num_levels = 12;
  options.max_background_compactions = 10;
  options.max_bytes_for_level_multiplier = 2;
  options.level_compaction_dynamic_level_bytes = true;
  DestroyAndReopen(options);

  Random rnd(301);
  for (int i = 0; i < 300000; ++i) {
    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
  }

  ASSERT_OK(db_->CompactRange(nullptr, nullptr));
}

13146 13147 13148 13149 13150 13151 13152 13153 13154 13155 13156 13157 13158 13159 13160 13161 13162 13163 13164 13165 13166 13167 13168 13169 13170 13171 13172 13173 13174 13175 13176 13177
// Github issue #595
// Large write batch with column families
TEST_F(DBTest, LargeBatchWithColumnFamilies) {
  Options options;
  options.env = env_;
  options = CurrentOptions(options);
  options.write_buffer_size = 100000;  // Small write buffer
  CreateAndReopenWithCF({"pikachu"}, options);
  int64_t j = 0;
  for (int i = 0; i < 5; i++) {
    for (int pass = 1; pass <= 3; pass++) {
      WriteBatch batch;
      size_t write_size = 1024 * 1024 * (5 + i);
      fprintf(stderr, "prepare: %ld MB, pass:%d\n", (write_size / 1024 / 1024),
              pass);
      for (;;) {
        std::string data(3000, j++ % 127 + 20);
        data += std::to_string(j);
        batch.Put(handles_[0], Slice(data), Slice(data));
        if (batch.GetDataSize() > write_size) {
          break;
        }
      }
      fprintf(stderr, "write: %ld MB\n", (batch.GetDataSize() / 1024 / 1024));
      ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
      fprintf(stderr, "done\n");
    }
  }
  // make sure we can re-open it.
  ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
}

13178 13179 13180 13181 13182 13183 13184 13185 13186 13187 13188 13189 13190 13191 13192 13193 13194 13195 13196 13197 13198 13199 13200 13201 13202 13203 13204 13205 13206 13207 13208 13209 13210 13211 13212 13213 13214 13215 13216 13217 13218 13219 13220 13221 13222 13223 13224 13225 13226 13227 13228 13229 13230 13231 13232 13233 13234 13235 13236 13237 13238 13239 13240 13241
// Make sure that Flushes can proceed in parallel with CompactRange()
TEST_F(DBTest, FlushesInParallelWithCompactRange) {
  // iter == 0 -- leveled
  // iter == 1 -- leveled, but throw in a flush between two levels compacting
  // iter == 2 -- universal
  for (int iter = 0; iter < 3; ++iter) {
    printf("iter %d\n", iter);
    Options options = CurrentOptions();
    if (iter < 2) {
      options.compaction_style = kCompactionStyleLevel;
    } else {
      options.compaction_style = kCompactionStyleUniversal;
    }
    options.write_buffer_size = 110 << 10;
    options.level0_file_num_compaction_trigger = 4;
    options.num_levels = 4;
    options.compression = kNoCompression;
    options.max_bytes_for_level_base = 450 << 10;
    options.target_file_size_base = 98 << 10;
    options.max_write_buffer_number = 2;

    DestroyAndReopen(options);

    Random rnd(301);
    for (int num = 0; num < 14; num++) {
      GenerateNewRandomFile(&rnd);
    }

    if (iter == 1) {
    rocksdb::SyncPoint::GetInstance()->LoadDependency(
        {{"DBImpl::RunManualCompaction()::1",
          "DBTest::FlushesInParallelWithCompactRange:1"},
         {"DBTest::FlushesInParallelWithCompactRange:2",
          "DBImpl::RunManualCompaction()::2"}});
    } else {
      rocksdb::SyncPoint::GetInstance()->LoadDependency(
          {{"CompactionJob::Run():Start",
            "DBTest::FlushesInParallelWithCompactRange:1"},
           {"DBTest::FlushesInParallelWithCompactRange:2",
            "CompactionJob::Run():End"}});
    }
    rocksdb::SyncPoint::GetInstance()->EnableProcessing();

    std::vector<std::thread> threads;
    threads.emplace_back([&]() { Compact("a", "z"); });

    TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:1");

    // this has to start a flush. if flushes are blocked, this will try to
    // create
    // 3 memtables, and that will fail because max_write_buffer_number is 2
    for (int num = 0; num < 3; num++) {
      GenerateNewRandomFile(&rnd, /* nowait */ true);
    }

    TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:2");

    for (auto& t : threads) {
      t.join();
    }
    rocksdb::SyncPoint::GetInstance()->DisableProcessing();
  }
}

13242
}  // namespace rocksdb
J
jorlow@chromium.org 已提交
13243 13244

int main(int argc, char** argv) {
13245
  rocksdb::port::InstallStackTraceHandler();
I
Igor Sugak 已提交
13246 13247
  ::testing::InitGoogleTest(&argc, argv);
  return RUN_ALL_TESTS();
J
jorlow@chromium.org 已提交
13248
}