table_test.cc 155.6 KB
Newer Older
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
S
Siying Dong 已提交
2 3 4
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
5
//
J
jorlow@chromium.org 已提交
6 7 8
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
9 10

#include <stdio.h>
11

K
Kai Liu 已提交
12
#include <algorithm>
13
#include <iostream>
J
jorlow@chromium.org 已提交
14
#include <map>
J
Jim Paton 已提交
15
#include <memory>
16
#include <string>
K
Kai Liu 已提交
17 18
#include <vector>

19
#include "block_fetcher.h"
M
Maysam Yabandeh 已提交
20
#include "cache/lru_cache.h"
J
jorlow@chromium.org 已提交
21 22 23
#include "db/dbformat.h"
#include "db/memtable.h"
#include "db/write_batch_internal.h"
24
#include "memtable/stl_wrappers.h"
25
#include "meta_blocks.h"
26
#include "monitoring/statistics.h"
D
Dmitri Smirnov 已提交
27
#include "port/port.h"
K
Kai Liu 已提交
28
#include "rocksdb/cache.h"
29 30 31 32
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/iterator.h"
#include "rocksdb/memtablerep.h"
I
Igor Canadi 已提交
33
#include "rocksdb/perf_context.h"
34 35
#include "rocksdb/slice_transform.h"
#include "rocksdb/statistics.h"
36
#include "rocksdb/write_buffer_manager.h"
37 38 39 40 41 42
#include "table/block_based/block.h"
#include "table/block_based/block_based_table_builder.h"
#include "table/block_based/block_based_table_factory.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/block_builder.h"
#include "table/block_based/flush_block_policy.h"
J
jorlow@chromium.org 已提交
43
#include "table/format.h"
44
#include "table/get_context.h"
S
sdong 已提交
45
#include "table/internal_iterator.h"
46
#include "table/plain/plain_table_factory.h"
S
sdong 已提交
47
#include "table/scoped_arena_iterator.h"
48
#include "table/sst_file_writer_collectors.h"
49 50 51
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
52 53 54
#include "util/compression.h"
#include "util/random.h"
#include "util/string_util.h"
55
#include "utilities/merge_operators.h"
56

57
namespace rocksdb {
J
jorlow@chromium.org 已提交
58

I
xxHash  
Igor Canadi 已提交
59 60 61 62 63
extern const uint64_t kLegacyBlockBasedTableMagicNumber;
extern const uint64_t kLegacyPlainTableMagicNumber;
extern const uint64_t kBlockBasedTableMagicNumber;
extern const uint64_t kPlainTableMagicNumber;

64
namespace {
K
Kai Liu 已提交
65

66 67 68
// DummyPropertiesCollector used to test BlockBasedTableProperties
class DummyPropertiesCollector : public TablePropertiesCollector {
 public:
69
  const char* Name() const override { return ""; }
70

71
  Status Finish(UserCollectedProperties* /*properties*/) override {
A
Andrew Kryczka 已提交
72 73
    return Status::OK();
  }
74

75
  Status Add(const Slice& /*user_key*/, const Slice& /*value*/) override {
A
Andrew Kryczka 已提交
76 77
    return Status::OK();
  }
78

79
  UserCollectedProperties GetReadableProperties() const override {
80 81 82 83 84 85 86
    return UserCollectedProperties{};
  }
};

class DummyPropertiesCollectorFactory1
    : public TablePropertiesCollectorFactory {
 public:
87 88
  TablePropertiesCollector* CreateTablePropertiesCollector(
      TablePropertiesCollectorFactory::Context /*context*/) override {
89 90
    return new DummyPropertiesCollector();
  }
91
  const char* Name() const override { return "DummyPropertiesCollector1"; }
92 93 94 95 96
};

class DummyPropertiesCollectorFactory2
    : public TablePropertiesCollectorFactory {
 public:
97 98
  TablePropertiesCollector* CreateTablePropertiesCollector(
      TablePropertiesCollectorFactory::Context /*context*/) override {
99 100
    return new DummyPropertiesCollector();
  }
101
  const char* Name() const override { return "DummyPropertiesCollector2"; }
102 103
};

J
jorlow@chromium.org 已提交
104 105
// Return reverse of "key".
// Used to test non-lexicographic comparators.
K
Kai Liu 已提交
106 107 108
std::string Reverse(const Slice& key) {
  auto rev = key.ToString();
  std::reverse(rev.begin(), rev.end());
J
jorlow@chromium.org 已提交
109 110 111 112 113
  return rev;
}

class ReverseKeyComparator : public Comparator {
 public:
114
  const char* Name() const override {
115
    return "rocksdb.ReverseBytewiseComparator";
J
jorlow@chromium.org 已提交
116 117
  }

118
  int Compare(const Slice& a, const Slice& b) const override {
J
jorlow@chromium.org 已提交
119 120 121
    return BytewiseComparator()->Compare(Reverse(a), Reverse(b));
  }

122 123
  void FindShortestSeparator(std::string* start,
                             const Slice& limit) const override {
J
jorlow@chromium.org 已提交
124 125 126 127 128 129
    std::string s = Reverse(*start);
    std::string l = Reverse(limit);
    BytewiseComparator()->FindShortestSeparator(&s, l);
    *start = Reverse(s);
  }

130
  void FindShortSuccessor(std::string* key) const override {
J
jorlow@chromium.org 已提交
131 132 133 134 135 136
    std::string s = Reverse(*key);
    BytewiseComparator()->FindShortSuccessor(&s);
    *key = Reverse(s);
  }
};

K
Kai Liu 已提交
137 138 139
ReverseKeyComparator reverse_key_comparator;

void Increment(const Comparator* cmp, std::string* key) {
J
jorlow@chromium.org 已提交
140 141 142 143 144 145 146 147 148 149
  if (cmp == BytewiseComparator()) {
    key->push_back('\0');
  } else {
    assert(cmp == &reverse_key_comparator);
    std::string rev = Reverse(*key);
    rev.push_back('\0');
    *key = Reverse(rev);
  }
}

H
Hans Wennborg 已提交
150
}  // namespace
J
jorlow@chromium.org 已提交
151 152 153 154 155

// Helper class for tests to unify the interface between
// BlockBuilder/TableBuilder and Block/Table.
class Constructor {
 public:
156 157
  explicit Constructor(const Comparator* cmp)
      : data_(stl_wrappers::LessOfComparator(cmp)) {}
J
jorlow@chromium.org 已提交
158 159 160 161 162 163 164 165 166
  virtual ~Constructor() { }

  void Add(const std::string& key, const Slice& value) {
    data_[key] = value.ToString();
  }

  // Finish constructing the data structure with all the keys that have
  // been added so far.  Returns the keys in sorted order in "*keys"
  // and stores the key/value pairs in "*kvmap"
167
  void Finish(const Options& options, const ImmutableCFOptions& ioptions,
168
              const MutableCFOptions& moptions,
169
              const BlockBasedTableOptions& table_options,
170
              const InternalKeyComparator& internal_comparator,
171
              std::vector<std::string>* keys, stl_wrappers::KVMap* kvmap) {
172
    last_internal_key_ = &internal_comparator;
J
jorlow@chromium.org 已提交
173 174
    *kvmap = data_;
    keys->clear();
175 176
    for (const auto& kv : data_) {
      keys->push_back(kv.first);
J
jorlow@chromium.org 已提交
177 178
    }
    data_.clear();
179
    Status s = FinishImpl(options, ioptions, moptions, table_options,
L
Lei Jin 已提交
180
                          internal_comparator, *kvmap);
J
jorlow@chromium.org 已提交
181 182 183 184
    ASSERT_TRUE(s.ok()) << s.ToString();
  }

  // Construct the data structure from the data in "data"
185
  virtual Status FinishImpl(const Options& options,
L
Lei Jin 已提交
186
                            const ImmutableCFOptions& ioptions,
187
                            const MutableCFOptions& moptions,
188
                            const BlockBasedTableOptions& table_options,
189
                            const InternalKeyComparator& internal_comparator,
190
                            const stl_wrappers::KVMap& data) = 0;
J
jorlow@chromium.org 已提交
191

192 193
  virtual InternalIterator* NewIterator(
      const SliceTransform* prefix_extractor = nullptr) const = 0;
J
jorlow@chromium.org 已提交
194

195
  virtual const stl_wrappers::KVMap& data() { return data_; }
J
jorlow@chromium.org 已提交
196

197 198
  virtual bool IsArenaMode() const { return false; }

A
Abhishek Kona 已提交
199
  virtual DB* db() const { return nullptr; }  // Overridden in DBConstructor
J
jorlow@chromium.org 已提交
200

201 202
  virtual bool AnywayDeleteIterator() const { return false; }

203 204 205
 protected:
  const InternalKeyComparator* last_internal_key_;

J
jorlow@chromium.org 已提交
206
 private:
207
  stl_wrappers::KVMap data_;
J
jorlow@chromium.org 已提交
208 209 210 211 212 213 214
};

class BlockConstructor: public Constructor {
 public:
  explicit BlockConstructor(const Comparator* cmp)
      : Constructor(cmp),
        comparator_(cmp),
A
Abhishek Kona 已提交
215
        block_(nullptr) { }
216 217 218 219 220 221 222
  ~BlockConstructor() override { delete block_; }
  Status FinishImpl(const Options& /*options*/,
                    const ImmutableCFOptions& /*ioptions*/,
                    const MutableCFOptions& /*moptions*/,
                    const BlockBasedTableOptions& table_options,
                    const InternalKeyComparator& /*internal_comparator*/,
                    const stl_wrappers::KVMap& kv_map) override {
J
jorlow@chromium.org 已提交
223
    delete block_;
A
Abhishek Kona 已提交
224
    block_ = nullptr;
I
Igor Canadi 已提交
225
    BlockBuilder builder(table_options.block_restart_interval);
J
jorlow@chromium.org 已提交
226

I
Igor Canadi 已提交
227 228
    for (const auto kv : kv_map) {
      builder.Add(kv.first, kv.second);
J
jorlow@chromium.org 已提交
229 230
    }
    // Open the block
S
Sanjay Ghemawat 已提交
231 232 233
    data_ = builder.Finish().ToString();
    BlockContents contents;
    contents.data = data_;
234
    block_ = new Block(std::move(contents), kDisableGlobalSequenceNumber);
J
jorlow@chromium.org 已提交
235 236
    return Status::OK();
  }
237
  InternalIterator* NewIterator(
238
      const SliceTransform* /*prefix_extractor*/) const override {
239
    return block_->NewDataIterator(comparator_, comparator_);
J
jorlow@chromium.org 已提交
240 241 242 243
  }

 private:
  const Comparator* comparator_;
S
Sanjay Ghemawat 已提交
244
  std::string data_;
J
jorlow@chromium.org 已提交
245 246 247 248 249
  Block* block_;

  BlockConstructor();
};

250
// A helper class that converts internal format keys into user keys
S
sdong 已提交
251
class KeyConvertingIterator : public InternalIterator {
J
jorlow@chromium.org 已提交
252
 public:
S
sdong 已提交
253 254
  explicit KeyConvertingIterator(InternalIterator* iter,
                                 bool arena_mode = false)
255
      : iter_(iter), arena_mode_(arena_mode) {}
256
  ~KeyConvertingIterator() override {
257
    if (arena_mode_) {
S
sdong 已提交
258
      iter_->~InternalIterator();
259 260 261 262
    } else {
      delete iter_;
    }
  }
263 264
  bool Valid() const override { return iter_->Valid() && status_.ok(); }
  void Seek(const Slice& target) override {
265 266 267 268 269
    ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
    std::string encoded;
    AppendInternalKey(&encoded, ikey);
    iter_->Seek(encoded);
  }
270
  void SeekForPrev(const Slice& target) override {
A
Aaron Gao 已提交
271 272 273 274 275
    ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
    std::string encoded;
    AppendInternalKey(&encoded, ikey);
    iter_->SeekForPrev(encoded);
  }
276 277 278 279
  void SeekToFirst() override { iter_->SeekToFirst(); }
  void SeekToLast() override { iter_->SeekToLast(); }
  void Next() override { iter_->Next(); }
  void Prev() override { iter_->Prev(); }
280
  bool IsOutOfBound() override { return iter_->IsOutOfBound(); }
281

282
  Slice key() const override {
283
    assert(Valid());
I
Igor Canadi 已提交
284 285
    ParsedInternalKey parsed_key;
    if (!ParseInternalKey(iter_->key(), &parsed_key)) {
286 287 288
      status_ = Status::Corruption("malformed internal key");
      return Slice("corrupted key");
    }
I
Igor Canadi 已提交
289
    return parsed_key.user_key;
J
jorlow@chromium.org 已提交
290
  }
291

292 293
  Slice value() const override { return iter_->value(); }
  Status status() const override {
294 295 296 297 298
    return status_.ok() ? iter_->status() : status_;
  }

 private:
  mutable Status status_;
S
sdong 已提交
299
  InternalIterator* iter_;
300
  bool arena_mode_;
301 302 303 304 305 306 307 308

  // No copying allowed
  KeyConvertingIterator(const KeyConvertingIterator&);
  void operator=(const KeyConvertingIterator&);
};

class TableConstructor: public Constructor {
 public:
K
kailiu 已提交
309
  explicit TableConstructor(const Comparator* cmp,
310
                            bool convert_to_internal_key = false,
311
                            int level = -1, SequenceNumber largest_seqno = 0)
312
      : Constructor(cmp),
313
        largest_seqno_(largest_seqno),
314 315
        convert_to_internal_key_(convert_to_internal_key),
        level_(level) {}
316
  ~TableConstructor() override { Reset(); }
317

318 319 320 321 322
  Status FinishImpl(const Options& options, const ImmutableCFOptions& ioptions,
                    const MutableCFOptions& moptions,
                    const BlockBasedTableOptions& /*table_options*/,
                    const InternalKeyComparator& internal_comparator,
                    const stl_wrappers::KVMap& kv_map) override {
J
jorlow@chromium.org 已提交
323
    Reset();
324
    soptions.use_mmap_reads = ioptions.allow_mmap_reads;
325 326
    file_writer_.reset(test::GetWritableFileWriter(new test::StringSink(),
                                                   "" /* don't care */));
327
    std::unique_ptr<TableBuilder> builder;
328 329
    std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
        int_tbl_prop_collector_factories;
330 331 332 333 334 335 336 337

    if (largest_seqno_ != 0) {
      // Pretend that it's an external file written by SstFileWriter.
      int_tbl_prop_collector_factories.emplace_back(
          new SstFileWriterPropertiesCollectorFactory(2 /* version */,
                                                      0 /* global_seqno*/));
    }

338
    std::string column_family_name;
L
Lei Jin 已提交
339
    builder.reset(ioptions.table_factory->NewTableBuilder(
340 341
        TableBuilderOptions(ioptions, moptions, internal_comparator,
                            &int_tbl_prop_collector_factories,
342
                            options.compression, options.sample_for_compression,
343
                            options.compression_opts, false /* skip_filters */,
344
                            column_family_name, level_),
345
        TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
346
        file_writer_.get()));
J
jorlow@chromium.org 已提交
347

I
Igor Canadi 已提交
348
    for (const auto kv : kv_map) {
349
      if (convert_to_internal_key_) {
I
Igor Canadi 已提交
350
        ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
351 352
        std::string encoded;
        AppendInternalKey(&encoded, ikey);
I
Igor Canadi 已提交
353
        builder->Add(encoded, kv.second);
354
      } else {
I
Igor Canadi 已提交
355
        builder->Add(kv.first, kv.second);
356
      }
357
      EXPECT_TRUE(builder->status().ok());
J
jorlow@chromium.org 已提交
358
    }
359
    Status s = builder->Finish();
360
    file_writer_->Flush();
361
    EXPECT_TRUE(s.ok()) << s.ToString();
J
jorlow@chromium.org 已提交
362

363
    EXPECT_EQ(TEST_GetSink()->contents().size(), builder->FileSize());
J
jorlow@chromium.org 已提交
364 365

    // Open the table
366
    uniq_id_ = cur_uniq_id_++;
A
Andres Notzli 已提交
367
    file_reader_.reset(test::GetRandomAccessFileReader(new test::StringSource(
368
        TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
369 370
    const bool kSkipFilters = true;
    const bool kImmortal = true;
L
Lei Jin 已提交
371
    return ioptions.table_factory->NewTableReader(
372
        TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
373
                           internal_comparator, !kSkipFilters, !kImmortal,
374
                           level_, largest_seqno_, nullptr),
375 376
        std::move(file_reader_), TEST_GetSink()->contents().size(),
        &table_reader_);
J
jorlow@chromium.org 已提交
377 378
  }

379
  InternalIterator* NewIterator(
380
      const SliceTransform* prefix_extractor) const override {
381
    ReadOptions ro;
382 383 384
    InternalIterator* iter = table_reader_->NewIterator(
        ro, prefix_extractor, /*arena=*/nullptr, /*skip_filters=*/false,
        TableReaderCaller::kUncategorized);
385 386 387 388 389
    if (convert_to_internal_key_) {
      return new KeyConvertingIterator(iter);
    } else {
      return iter;
    }
J
jorlow@chromium.org 已提交
390 391 392
  }

  uint64_t ApproximateOffsetOf(const Slice& key) const {
393 394 395
    if (convert_to_internal_key_) {
      InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
      const Slice skey = ikey.Encode();
396 397
      return table_reader_->ApproximateOffsetOf(
          skey, TableReaderCaller::kUncategorized);
398
    }
399 400
    return table_reader_->ApproximateOffsetOf(
        key, TableReaderCaller::kUncategorized);
J
jorlow@chromium.org 已提交
401 402
  }

403 404
  virtual Status Reopen(const ImmutableCFOptions& ioptions,
                        const MutableCFOptions& moptions) {
A
Andres Notzli 已提交
405
    file_reader_.reset(test::GetRandomAccessFileReader(new test::StringSource(
406
        TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
L
Lei Jin 已提交
407
    return ioptions.table_factory->NewTableReader(
408 409
        TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
                           *last_internal_key_),
410 411
        std::move(file_reader_), TEST_GetSink()->contents().size(),
        &table_reader_);
412 413
  }

414
  virtual TableReader* GetTableReader() { return table_reader_.get(); }
415

416
  bool AnywayDeleteIterator() const override {
417 418 419
    return convert_to_internal_key_;
  }

420 421
  void ResetTableReader() { table_reader_.reset(); }

422 423
  bool ConvertToInternalKey() { return convert_to_internal_key_; }

424 425 426 427
  test::StringSink* TEST_GetSink() {
    return static_cast<test::StringSink*>(file_writer_->writable_file());
  }

J
jorlow@chromium.org 已提交
428 429
 private:
  void Reset() {
430
    uniq_id_ = 0;
S
Siying Dong 已提交
431
    table_reader_.reset();
432 433 434 435
    file_writer_.reset();
    file_reader_.reset();
  }

436
  uint64_t uniq_id_;
437 438 439
  std::unique_ptr<WritableFileWriter> file_writer_;
  std::unique_ptr<RandomAccessFileReader> file_reader_;
  std::unique_ptr<TableReader> table_reader_;
440
  SequenceNumber largest_seqno_;
441
  bool convert_to_internal_key_;
442
  int level_;
J
jorlow@chromium.org 已提交
443

444
  TableConstructor();
445 446

  static uint64_t cur_uniq_id_;
447
  EnvOptions soptions;
J
jorlow@chromium.org 已提交
448
};
449
uint64_t TableConstructor::cur_uniq_id_ = 1;
J
jorlow@chromium.org 已提交
450 451 452

class MemTableConstructor: public Constructor {
 public:
453
  explicit MemTableConstructor(const Comparator* cmp, WriteBufferManager* wb)
J
jorlow@chromium.org 已提交
454
      : Constructor(cmp),
J
Jim Paton 已提交
455
        internal_comparator_(cmp),
456
        write_buffer_manager_(wb),
J
Jim Paton 已提交
457
        table_factory_(new SkipListFactory) {
458 459
    options_.memtable_factory = table_factory_;
    ImmutableCFOptions ioptions(options_);
Y
Yi Wu 已提交
460 461
    memtable_ =
        new MemTable(internal_comparator_, ioptions, MutableCFOptions(options_),
462
                     wb, kMaxSequenceNumber, 0 /* column_family_id */);
463
    memtable_->Ref();
J
jorlow@chromium.org 已提交
464
  }
465 466 467 468 469 470
  ~MemTableConstructor() override { delete memtable_->Unref(); }
  Status FinishImpl(const Options&, const ImmutableCFOptions& ioptions,
                    const MutableCFOptions& /*moptions*/,
                    const BlockBasedTableOptions& /*table_options*/,
                    const InternalKeyComparator& /*internal_comparator*/,
                    const stl_wrappers::KVMap& kv_map) override {
471
    delete memtable_->Unref();
472
    ImmutableCFOptions mem_ioptions(ioptions);
473
    memtable_ = new MemTable(internal_comparator_, mem_ioptions,
Y
Yi Wu 已提交
474
                             MutableCFOptions(options_), write_buffer_manager_,
475
                             kMaxSequenceNumber, 0 /* column_family_id */);
476
    memtable_->Ref();
J
jorlow@chromium.org 已提交
477
    int seq = 1;
I
Igor Canadi 已提交
478 479
    for (const auto kv : kv_map) {
      memtable_->Add(seq, kTypeValue, kv.first, kv.second);
J
jorlow@chromium.org 已提交
480 481 482 483
      seq++;
    }
    return Status::OK();
  }
484
  InternalIterator* NewIterator(
485
      const SliceTransform* /*prefix_extractor*/) const override {
486 487
    return new KeyConvertingIterator(
        memtable_->NewIterator(ReadOptions(), &arena_), true);
J
jorlow@chromium.org 已提交
488 489
  }

490
  bool AnywayDeleteIterator() const override { return true; }
491

492
  bool IsArenaMode() const override { return true; }
493

J
jorlow@chromium.org 已提交
494
 private:
495
  mutable Arena arena_;
J
jorlow@chromium.org 已提交
496
  InternalKeyComparator internal_comparator_;
497
  Options options_;
498
  WriteBufferManager* write_buffer_manager_;
J
jorlow@chromium.org 已提交
499
  MemTable* memtable_;
J
Jim Paton 已提交
500
  std::shared_ptr<SkipListFactory> table_factory_;
J
jorlow@chromium.org 已提交
501 502
};

S
sdong 已提交
503 504 505
class InternalIteratorFromIterator : public InternalIterator {
 public:
  explicit InternalIteratorFromIterator(Iterator* it) : it_(it) {}
506 507 508 509 510 511 512
  bool Valid() const override { return it_->Valid(); }
  void Seek(const Slice& target) override { it_->Seek(target); }
  void SeekForPrev(const Slice& target) override { it_->SeekForPrev(target); }
  void SeekToFirst() override { it_->SeekToFirst(); }
  void SeekToLast() override { it_->SeekToLast(); }
  void Next() override { it_->Next(); }
  void Prev() override { it_->Prev(); }
S
sdong 已提交
513 514
  Slice key() const override { return it_->key(); }
  Slice value() const override { return it_->value(); }
515
  Status status() const override { return it_->status(); }
S
sdong 已提交
516 517

 private:
518
  std::unique_ptr<Iterator> it_;
S
sdong 已提交
519 520
};

J
jorlow@chromium.org 已提交
521 522 523 524 525
class DBConstructor: public Constructor {
 public:
  explicit DBConstructor(const Comparator* cmp)
      : Constructor(cmp),
        comparator_(cmp) {
A
Abhishek Kona 已提交
526
    db_ = nullptr;
J
jorlow@chromium.org 已提交
527 528
    NewDB();
  }
529 530 531 532 533 534 535
  ~DBConstructor() override { delete db_; }
  Status FinishImpl(const Options& /*options*/,
                    const ImmutableCFOptions& /*ioptions*/,
                    const MutableCFOptions& /*moptions*/,
                    const BlockBasedTableOptions& /*table_options*/,
                    const InternalKeyComparator& /*internal_comparator*/,
                    const stl_wrappers::KVMap& kv_map) override {
J
jorlow@chromium.org 已提交
536
    delete db_;
A
Abhishek Kona 已提交
537
    db_ = nullptr;
J
jorlow@chromium.org 已提交
538
    NewDB();
I
Igor Canadi 已提交
539
    for (const auto kv : kv_map) {
J
jorlow@chromium.org 已提交
540
      WriteBatch batch;
I
Igor Canadi 已提交
541
      batch.Put(kv.first, kv.second);
542
      EXPECT_TRUE(db_->Write(WriteOptions(), &batch).ok());
J
jorlow@chromium.org 已提交
543 544 545
    }
    return Status::OK();
  }
S
sdong 已提交
546

547
  InternalIterator* NewIterator(
548
      const SliceTransform* /*prefix_extractor*/) const override {
S
sdong 已提交
549
    return new InternalIteratorFromIterator(db_->NewIterator(ReadOptions()));
J
jorlow@chromium.org 已提交
550 551
  }

552
  DB* db() const override { return db_; }
J
jorlow@chromium.org 已提交
553

J
jorlow@chromium.org 已提交
554 555
 private:
  void NewDB() {
556
    std::string name = test::PerThreadDBPath("table_testdb");
J
jorlow@chromium.org 已提交
557

558
    Options options;
J
jorlow@chromium.org 已提交
559 560 561 562 563 564
    options.comparator = comparator_;
    Status status = DestroyDB(name, options);
    ASSERT_TRUE(status.ok()) << status.ToString();

    options.create_if_missing = true;
    options.error_if_exists = true;
J
jorlow@chromium.org 已提交
565
    options.write_buffer_size = 10000;  // Something small to force merging
J
jorlow@chromium.org 已提交
566 567 568 569 570 571 572 573 574
    status = DB::Open(options, name, &db_);
    ASSERT_TRUE(status.ok()) << status.ToString();
  }

  const Comparator* comparator_;
  DB* db_;
};

enum TestType {
575
  BLOCK_BASED_TABLE_TEST,
576
#ifndef ROCKSDB_LITE
577 578
  PLAIN_TABLE_SEMI_FIXED_PREFIX,
  PLAIN_TABLE_FULL_STR_PREFIX,
579
  PLAIN_TABLE_TOTAL_ORDER,
580
#endif  // !ROCKSDB_LITE
J
jorlow@chromium.org 已提交
581 582
  BLOCK_TEST,
  MEMTABLE_TEST,
583
  DB_TEST
J
jorlow@chromium.org 已提交
584 585 586 587 588 589
};

struct TestArgs {
  TestType type;
  bool reverse_compare;
  int restart_interval;
H
heyongqiang 已提交
590
  CompressionType compression;
591
  uint32_t format_version;
592
  bool use_mmap;
J
jorlow@chromium.org 已提交
593 594
};

595
static std::vector<TestArgs> GenerateArgList() {
K
Kai Liu 已提交
596 597
  std::vector<TestArgs> test_args;
  std::vector<TestType> test_types = {
598 599 600 601 602 603 604 605
      BLOCK_BASED_TABLE_TEST,
#ifndef ROCKSDB_LITE
      PLAIN_TABLE_SEMI_FIXED_PREFIX,
      PLAIN_TABLE_FULL_STR_PREFIX,
      PLAIN_TABLE_TOTAL_ORDER,
#endif  // !ROCKSDB_LITE
      BLOCK_TEST,
      MEMTABLE_TEST, DB_TEST};
K
Kai Liu 已提交
606 607
  std::vector<bool> reverse_compare_types = {false, true};
  std::vector<int> restart_intervals = {16, 1, 1024};
H
heyongqiang 已提交
608 609

  // Only add compression if it is supported
610 611
  std::vector<std::pair<CompressionType, bool>> compression_types;
  compression_types.emplace_back(kNoCompression, false);
I
Igor Canadi 已提交
612
  if (Snappy_Supported()) {
613
    compression_types.emplace_back(kSnappyCompression, false);
K
Kai Liu 已提交
614
  }
I
Igor Canadi 已提交
615
  if (Zlib_Supported()) {
616 617
    compression_types.emplace_back(kZlibCompression, false);
    compression_types.emplace_back(kZlibCompression, true);
K
Kai Liu 已提交
618
  }
I
Igor Canadi 已提交
619
  if (BZip2_Supported()) {
620 621
    compression_types.emplace_back(kBZip2Compression, false);
    compression_types.emplace_back(kBZip2Compression, true);
K
Kai Liu 已提交
622
  }
I
Igor Canadi 已提交
623
  if (LZ4_Supported()) {
624 625 626 627
    compression_types.emplace_back(kLZ4Compression, false);
    compression_types.emplace_back(kLZ4Compression, true);
    compression_types.emplace_back(kLZ4HCCompression, false);
    compression_types.emplace_back(kLZ4HCCompression, true);
A
Albert Strasheim 已提交
628
  }
629 630 631 632
  if (XPRESS_Supported()) {
    compression_types.emplace_back(kXpressCompression, false);
    compression_types.emplace_back(kXpressCompression, true);
  }
633
  if (ZSTD_Supported()) {
S
sdong 已提交
634 635
    compression_types.emplace_back(kZSTD, false);
    compression_types.emplace_back(kZSTD, true);
636
  }
H
heyongqiang 已提交
637

K
Kai Liu 已提交
638 639
  for (auto test_type : test_types) {
    for (auto reverse_compare : reverse_compare_types) {
640
#ifndef ROCKSDB_LITE
K
Kai Liu 已提交
641
      if (test_type == PLAIN_TABLE_SEMI_FIXED_PREFIX ||
642 643
          test_type == PLAIN_TABLE_FULL_STR_PREFIX ||
          test_type == PLAIN_TABLE_TOTAL_ORDER) {
644 645
        // Plain table doesn't use restart index or compression.
        TestArgs one_arg;
K
Kai Liu 已提交
646 647 648
        one_arg.type = test_type;
        one_arg.reverse_compare = reverse_compare;
        one_arg.restart_interval = restart_intervals[0];
649
        one_arg.compression = compression_types[0].first;
650 651 652
        one_arg.use_mmap = true;
        test_args.push_back(one_arg);
        one_arg.use_mmap = false;
K
Kai Liu 已提交
653
        test_args.push_back(one_arg);
654 655
        continue;
      }
656
#endif  // !ROCKSDB_LITE
H
heyongqiang 已提交
657

K
Kai Liu 已提交
658 659
      for (auto restart_interval : restart_intervals) {
        for (auto compression_type : compression_types) {
660
          TestArgs one_arg;
K
Kai Liu 已提交
661 662 663
          one_arg.type = test_type;
          one_arg.reverse_compare = reverse_compare;
          one_arg.restart_interval = restart_interval;
664 665
          one_arg.compression = compression_type.first;
          one_arg.format_version = compression_type.second ? 2 : 1;
666
          one_arg.use_mmap = false;
K
Kai Liu 已提交
667
          test_args.push_back(one_arg);
668
        }
K
Kai Liu 已提交
669
      }
670
    }
K
Kai Liu 已提交
671 672
  }
  return test_args;
H
heyongqiang 已提交
673
}
J
jorlow@chromium.org 已提交
674

675 676 677 678 679 680 681 682 683 684 685 686 687
// In order to make all tests run for plain table format, including
// those operating on empty keys, create a new prefix transformer which
// return fixed prefix if the slice is not shorter than the prefix length,
// and the full slice if it is shorter.
class FixedOrLessPrefixTransform : public SliceTransform {
 private:
  const size_t prefix_len_;

 public:
  explicit FixedOrLessPrefixTransform(size_t prefix_len) :
      prefix_len_(prefix_len) {
  }

688
  const char* Name() const override { return "rocksdb.FixedPrefix"; }
689

690
  Slice Transform(const Slice& src) const override {
691 692 693 694 695 696 697
    assert(InDomain(src));
    if (src.size() < prefix_len_) {
      return src;
    }
    return Slice(src.data(), prefix_len_);
  }

698
  bool InDomain(const Slice& /*src*/) const override { return true; }
699

700
  bool InRange(const Slice& dst) const override {
701 702
    return (dst.size() <= prefix_len_);
  }
703
  bool FullLengthEnabled(size_t* /*len*/) const override { return false; }
704 705
};

I
Igor Sugak 已提交
706
class HarnessTest : public testing::Test {
J
jorlow@chromium.org 已提交
707
 public:
708
  HarnessTest()
709
      : ioptions_(options_),
710
        moptions_(options_),
711 712
        constructor_(nullptr),
        write_buffer_(options_.db_write_buffer_size) {}
J
jorlow@chromium.org 已提交
713 714 715

  void Init(const TestArgs& args) {
    delete constructor_;
A
Abhishek Kona 已提交
716
    constructor_ = nullptr;
717
    options_ = Options();
H
heyongqiang 已提交
718
    options_.compression = args.compression;
J
jorlow@chromium.org 已提交
719 720 721 722 723
    // Use shorter block size for tests to exercise block boundary
    // conditions more.
    if (args.reverse_compare) {
      options_.comparator = &reverse_key_comparator;
    }
724 725 726 727

    internal_comparator_.reset(
        new test::PlainInternalKeyComparator(options_.comparator));

728 729
    support_prev_ = true;
    only_support_prefix_seek_ = false;
730
    options_.allow_mmap_reads = args.use_mmap;
J
jorlow@chromium.org 已提交
731
    switch (args.type) {
732
      case BLOCK_BASED_TABLE_TEST:
733
        table_options_.flush_block_policy_factory.reset(
734
            new FlushBlockBySizePolicyFactory());
735 736
        table_options_.block_size = 256;
        table_options_.block_restart_interval = args.restart_interval;
737
        table_options_.index_block_restart_interval = args.restart_interval;
738
        table_options_.format_version = args.format_version;
739 740
        options_.table_factory.reset(
            new BlockBasedTableFactory(table_options_));
741 742 743 744
        constructor_ = new TableConstructor(
            options_.comparator, true /* convert_to_internal_key_ */);
        internal_comparator_.reset(
            new InternalKeyComparator(options_.comparator));
745
        break;
746 747
// Plain table is not supported in ROCKSDB_LITE
#ifndef ROCKSDB_LITE
748 749 750
      case PLAIN_TABLE_SEMI_FIXED_PREFIX:
        support_prev_ = false;
        only_support_prefix_seek_ = true;
751
        options_.prefix_extractor.reset(new FixedOrLessPrefixTransform(2));
752
        options_.table_factory.reset(NewPlainTableFactory());
753 754
        constructor_ = new TableConstructor(
            options_.comparator, true /* convert_to_internal_key_ */);
755 756
        internal_comparator_.reset(
            new InternalKeyComparator(options_.comparator));
757 758 759 760
        break;
      case PLAIN_TABLE_FULL_STR_PREFIX:
        support_prev_ = false;
        only_support_prefix_seek_ = true;
761
        options_.prefix_extractor.reset(NewNoopTransform());
762
        options_.table_factory.reset(NewPlainTableFactory());
763 764
        constructor_ = new TableConstructor(
            options_.comparator, true /* convert_to_internal_key_ */);
765 766 767 768 769 770 771
        internal_comparator_.reset(
            new InternalKeyComparator(options_.comparator));
        break;
      case PLAIN_TABLE_TOTAL_ORDER:
        support_prev_ = false;
        only_support_prefix_seek_ = false;
        options_.prefix_extractor = nullptr;
S
Stanislau Hlebik 已提交
772 773 774 775 776 777 778 779 780 781

        {
          PlainTableOptions plain_table_options;
          plain_table_options.user_key_len = kPlainTableVariableLength;
          plain_table_options.bloom_bits_per_key = 0;
          plain_table_options.hash_table_ratio = 0;

          options_.table_factory.reset(
              NewPlainTableFactory(plain_table_options));
        }
782 783
        constructor_ = new TableConstructor(
            options_.comparator, true /* convert_to_internal_key_ */);
784 785
        internal_comparator_.reset(
            new InternalKeyComparator(options_.comparator));
J
jorlow@chromium.org 已提交
786
        break;
787
#endif  // !ROCKSDB_LITE
J
jorlow@chromium.org 已提交
788
      case BLOCK_TEST:
789 790 791
        table_options_.block_size = 256;
        options_.table_factory.reset(
            new BlockBasedTableFactory(table_options_));
J
jorlow@chromium.org 已提交
792 793 794
        constructor_ = new BlockConstructor(options_.comparator);
        break;
      case MEMTABLE_TEST:
795 796 797
        table_options_.block_size = 256;
        options_.table_factory.reset(
            new BlockBasedTableFactory(table_options_));
798 799
        constructor_ = new MemTableConstructor(options_.comparator,
                                               &write_buffer_);
J
jorlow@chromium.org 已提交
800 801
        break;
      case DB_TEST:
802 803 804
        table_options_.block_size = 256;
        options_.table_factory.reset(
            new BlockBasedTableFactory(table_options_));
J
jorlow@chromium.org 已提交
805 806 807
        constructor_ = new DBConstructor(options_.comparator);
        break;
    }
L
Lei Jin 已提交
808
    ioptions_ = ImmutableCFOptions(options_);
809
    moptions_ = MutableCFOptions(options_);
J
jorlow@chromium.org 已提交
810 811
  }

812
  ~HarnessTest() override { delete constructor_; }
J
jorlow@chromium.org 已提交
813 814 815 816 817 818 819

  void Add(const std::string& key, const std::string& value) {
    constructor_->Add(key, value);
  }

  void Test(Random* rnd) {
    std::vector<std::string> keys;
820
    stl_wrappers::KVMap data;
821
    constructor_->Finish(options_, ioptions_, moptions_, table_options_,
L
Lei Jin 已提交
822
                         *internal_comparator_, &keys, &data);
J
jorlow@chromium.org 已提交
823 824

    TestForwardScan(keys, data);
825 826 827
    if (support_prev_) {
      TestBackwardScan(keys, data);
    }
J
jorlow@chromium.org 已提交
828 829 830
    TestRandomAccess(rnd, keys, data);
  }

A
Andrew Kryczka 已提交
831
  void TestForwardScan(const std::vector<std::string>& /*keys*/,
832
                       const stl_wrappers::KVMap& data) {
S
sdong 已提交
833
    InternalIterator* iter = constructor_->NewIterator();
J
jorlow@chromium.org 已提交
834 835
    ASSERT_TRUE(!iter->Valid());
    iter->SeekToFirst();
836 837
    for (stl_wrappers::KVMap::const_iterator model_iter = data.begin();
         model_iter != data.end(); ++model_iter) {
J
jorlow@chromium.org 已提交
838 839 840 841
      ASSERT_EQ(ToString(data, model_iter), ToString(iter));
      iter->Next();
    }
    ASSERT_TRUE(!iter->Valid());
842
    if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
S
sdong 已提交
843
      iter->~InternalIterator();
844 845 846
    } else {
      delete iter;
    }
J
jorlow@chromium.org 已提交
847 848
  }

A
Andrew Kryczka 已提交
849
  void TestBackwardScan(const std::vector<std::string>& /*keys*/,
850
                        const stl_wrappers::KVMap& data) {
S
sdong 已提交
851
    InternalIterator* iter = constructor_->NewIterator();
J
jorlow@chromium.org 已提交
852 853
    ASSERT_TRUE(!iter->Valid());
    iter->SeekToLast();
854 855
    for (stl_wrappers::KVMap::const_reverse_iterator model_iter = data.rbegin();
         model_iter != data.rend(); ++model_iter) {
J
jorlow@chromium.org 已提交
856 857 858 859
      ASSERT_EQ(ToString(data, model_iter), ToString(iter));
      iter->Prev();
    }
    ASSERT_TRUE(!iter->Valid());
860
    if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
S
sdong 已提交
861
      iter->~InternalIterator();
862 863 864
    } else {
      delete iter;
    }
J
jorlow@chromium.org 已提交
865 866
  }

867 868
  void TestRandomAccess(Random* rnd, const std::vector<std::string>& keys,
                        const stl_wrappers::KVMap& data) {
J
jorlow@chromium.org 已提交
869
    static const bool kVerbose = false;
S
sdong 已提交
870
    InternalIterator* iter = constructor_->NewIterator();
J
jorlow@chromium.org 已提交
871
    ASSERT_TRUE(!iter->Valid());
872
    stl_wrappers::KVMap::const_iterator model_iter = data.begin();
J
jorlow@chromium.org 已提交
873 874
    if (kVerbose) fprintf(stderr, "---\n");
    for (int i = 0; i < 200; i++) {
875
      const int toss = rnd->Uniform(support_prev_ ? 5 : 3);
J
jorlow@chromium.org 已提交
876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932
      switch (toss) {
        case 0: {
          if (iter->Valid()) {
            if (kVerbose) fprintf(stderr, "Next\n");
            iter->Next();
            ++model_iter;
            ASSERT_EQ(ToString(data, model_iter), ToString(iter));
          }
          break;
        }

        case 1: {
          if (kVerbose) fprintf(stderr, "SeekToFirst\n");
          iter->SeekToFirst();
          model_iter = data.begin();
          ASSERT_EQ(ToString(data, model_iter), ToString(iter));
          break;
        }

        case 2: {
          std::string key = PickRandomKey(rnd, keys);
          model_iter = data.lower_bound(key);
          if (kVerbose) fprintf(stderr, "Seek '%s'\n",
                                EscapeString(key).c_str());
          iter->Seek(Slice(key));
          ASSERT_EQ(ToString(data, model_iter), ToString(iter));
          break;
        }

        case 3: {
          if (iter->Valid()) {
            if (kVerbose) fprintf(stderr, "Prev\n");
            iter->Prev();
            if (model_iter == data.begin()) {
              model_iter = data.end();   // Wrap around to invalid value
            } else {
              --model_iter;
            }
            ASSERT_EQ(ToString(data, model_iter), ToString(iter));
          }
          break;
        }

        case 4: {
          if (kVerbose) fprintf(stderr, "SeekToLast\n");
          iter->SeekToLast();
          if (keys.empty()) {
            model_iter = data.end();
          } else {
            std::string last = data.rbegin()->first;
            model_iter = data.lower_bound(last);
          }
          ASSERT_EQ(ToString(data, model_iter), ToString(iter));
          break;
        }
      }
    }
933
    if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
S
sdong 已提交
934
      iter->~InternalIterator();
935 936 937
    } else {
      delete iter;
    }
J
jorlow@chromium.org 已提交
938 939
  }

940 941
  std::string ToString(const stl_wrappers::KVMap& data,
                       const stl_wrappers::KVMap::const_iterator& it) {
J
jorlow@chromium.org 已提交
942 943 944 945 946 947 948
    if (it == data.end()) {
      return "END";
    } else {
      return "'" + it->first + "->" + it->second + "'";
    }
  }

949 950
  std::string ToString(const stl_wrappers::KVMap& data,
                       const stl_wrappers::KVMap::const_reverse_iterator& it) {
J
jorlow@chromium.org 已提交
951 952 953 954 955 956 957
    if (it == data.rend()) {
      return "END";
    } else {
      return "'" + it->first + "->" + it->second + "'";
    }
  }

S
sdong 已提交
958
  std::string ToString(const InternalIterator* it) {
J
jorlow@chromium.org 已提交
959 960 961 962 963 964 965 966 967 968 969
    if (!it->Valid()) {
      return "END";
    } else {
      return "'" + it->key().ToString() + "->" + it->value().ToString() + "'";
    }
  }

  std::string PickRandomKey(Random* rnd, const std::vector<std::string>& keys) {
    if (keys.empty()) {
      return "foo";
    } else {
970
      const int index = rnd->Uniform(static_cast<int>(keys.size()));
J
jorlow@chromium.org 已提交
971
      std::string result = keys[index];
972
      switch (rnd->Uniform(support_prev_ ? 3 : 1)) {
J
jorlow@chromium.org 已提交
973 974 975 976 977
        case 0:
          // Return an existing key
          break;
        case 1: {
          // Attempt to return something smaller than an existing key
978 979 980 981 982
          if (result.size() > 0 && result[result.size() - 1] > '\0'
              && (!only_support_prefix_seek_
                  || options_.prefix_extractor->Transform(result).size()
                  < result.size())) {
            result[result.size() - 1]--;
J
jorlow@chromium.org 已提交
983 984
          }
          break;
985
      }
J
jorlow@chromium.org 已提交
986 987 988 989 990 991 992 993 994 995
        case 2: {
          // Return something larger than an existing key
          Increment(options_.comparator, &result);
          break;
        }
      }
      return result;
    }
  }

A
Abhishek Kona 已提交
996
  // Returns nullptr if not running against a DB
J
jorlow@chromium.org 已提交
997 998
  DB* db() const { return constructor_->db(); }

999 1000 1001 1002
  void RandomizedHarnessTest(size_t part, size_t total) {
    std::vector<TestArgs> args = GenerateArgList();
    assert(part);
    assert(part <= total);
T
Tamir Duberstein 已提交
1003
    for (size_t i = 0; i < args.size(); i++) {
1004 1005 1006
      if ((i % total) + 1 != part) {
        continue;
      }
1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
      Init(args[i]);
      Random rnd(test::RandomSeed() + 5);
      for (int num_entries = 0; num_entries < 2000;
           num_entries += (num_entries < 50 ? 1 : 200)) {
        for (int e = 0; e < num_entries; e++) {
          std::string v;
          Add(test::RandomKey(&rnd, rnd.Skewed(4)),
              test::RandomString(&rnd, rnd.Skewed(5), &v).ToString());
        }
        Test(&rnd);
      }
    }
  }

J
jorlow@chromium.org 已提交
1021
 private:
1022
  Options options_ = Options();
L
Lei Jin 已提交
1023
  ImmutableCFOptions ioptions_;
1024
  MutableCFOptions moptions_;
1025
  BlockBasedTableOptions table_options_ = BlockBasedTableOptions();
J
jorlow@chromium.org 已提交
1026
  Constructor* constructor_;
1027
  WriteBufferManager write_buffer_;
1028 1029
  bool support_prev_;
  bool only_support_prefix_seek_;
1030
  std::shared_ptr<InternalKeyComparator> internal_comparator_;
J
jorlow@chromium.org 已提交
1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043
};

static bool Between(uint64_t val, uint64_t low, uint64_t high) {
  bool result = (val >= low) && (val <= high);
  if (!result) {
    fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
            (unsigned long long)(val),
            (unsigned long long)(low),
            (unsigned long long)(high));
  }
  return result;
}

K
Kai Liu 已提交
1044
// Tests against all kinds of tables
I
Igor Sugak 已提交
1045
class TableTest : public testing::Test {
1046 1047 1048 1049 1050 1051 1052 1053 1054
 public:
  const InternalKeyComparator& GetPlainInternalComparator(
      const Comparator* comp) {
    if (!plain_internal_comparator) {
      plain_internal_comparator.reset(
          new test::PlainInternalKeyComparator(comp));
    }
    return *plain_internal_comparator;
  }
M
Maysam Yabandeh 已提交
1055
  void IndexTest(BlockBasedTableOptions table_options);
1056 1057 1058 1059 1060 1061

 private:
  std::unique_ptr<InternalKeyComparator> plain_internal_comparator;
};

class GeneralTableTest : public TableTest {};
1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073
class BlockBasedTableTest
    : public TableTest,
      virtual public ::testing::WithParamInterface<uint32_t> {
 public:
  BlockBasedTableTest() : format_(GetParam()) {}

  BlockBasedTableOptions GetBlockBasedTableOptions() {
    BlockBasedTableOptions options;
    options.format_version = format_;
    return options;
  }

1074 1075
 protected:
  uint64_t IndexUncompressedHelper(bool indexCompress);
1076 1077 1078

 private:
  uint32_t format_;
1079
};
1080
class PlainTableTest : public TableTest {};
I
Igor Sugak 已提交
1081
class TablePropertyTest : public testing::Test {};
1082
class BBTTailPrefetchTest : public TableTest {};
1083

1084 1085 1086 1087 1088
INSTANTIATE_TEST_CASE_P(FormatDef, BlockBasedTableTest,
                        testing::Values(test::kDefaultFormatVersion));
INSTANTIATE_TEST_CASE_P(FormatLatest, BlockBasedTableTest,
                        testing::Values(test::kLatestFormatVersion));

1089 1090
// This test serves as the living tutorial for the prefix scan of user collected
// properties.
I
Igor Sugak 已提交
1091
TEST_F(TablePropertyTest, PrefixScanTest) {
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109
  UserCollectedProperties props{{"num.111.1", "1"},
                                {"num.111.2", "2"},
                                {"num.111.3", "3"},
                                {"num.333.1", "1"},
                                {"num.333.2", "2"},
                                {"num.333.3", "3"},
                                {"num.555.1", "1"},
                                {"num.555.2", "2"},
                                {"num.555.3", "3"}, };

  // prefixes that exist
  for (const std::string& prefix : {"num.111", "num.333", "num.555"}) {
    int num = 0;
    for (auto pos = props.lower_bound(prefix);
         pos != props.end() &&
             pos->first.compare(0, prefix.size(), prefix) == 0;
         ++pos) {
      ++num;
1110
      auto key = prefix + "." + ToString(num);
1111
      ASSERT_EQ(key, pos->first);
1112
      ASSERT_EQ(ToString(num), pos->second);
1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124
    }
    ASSERT_EQ(3, num);
  }

  // prefixes that don't exist
  for (const std::string& prefix :
       {"num.000", "num.222", "num.444", "num.666"}) {
    auto pos = props.lower_bound(prefix);
    ASSERT_TRUE(pos == props.end() ||
                pos->first.compare(0, prefix.size(), prefix) != 0);
  }
}
J
jorlow@chromium.org 已提交
1125

K
Kai Liu 已提交
1126 1127
// This test include all the basic checks except those for index size and block
// size, which will be conducted in separated unit tests.
1128
TEST_P(BlockBasedTableTest, BasicBlockBasedTableProperties) {
1129
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
K
Kai Liu 已提交
1130 1131 1132 1133 1134 1135 1136 1137 1138 1139

  c.Add("a1", "val1");
  c.Add("b2", "val2");
  c.Add("c3", "val3");
  c.Add("d4", "val4");
  c.Add("e5", "val5");
  c.Add("f6", "val6");
  c.Add("g7", "val7");
  c.Add("h8", "val8");
  c.Add("j9", "val9");
1140
  uint64_t diff_internal_user_bytes = 9 * 8;  // 8 is seq size, 9 k-v totally
K
Kai Liu 已提交
1141 1142

  std::vector<std::string> keys;
1143
  stl_wrappers::KVMap kvmap;
1144
  Options options;
K
Kai Liu 已提交
1145
  options.compression = kNoCompression;
1146
  options.statistics = CreateDBStatistics();
1147
  options.statistics->set_stats_level(StatsLevel::kAll);
1148
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1149 1150
  table_options.block_restart_interval = 1;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
K
Kai Liu 已提交
1151

1152
  ImmutableCFOptions ioptions(options);
1153
  MutableCFOptions moptions(options);
1154
  ioptions.statistics = options.statistics.get();
1155
  c.Finish(options, ioptions, moptions, table_options,
1156
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1157
  ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_NOT_COMPRESSED), 0);
K
Kai Liu 已提交
1158

1159
  auto& props = *c.GetTableReader()->GetTableProperties();
K
kailiu 已提交
1160
  ASSERT_EQ(kvmap.size(), props.num_entries);
K
Kai Liu 已提交
1161 1162 1163 1164

  auto raw_key_size = kvmap.size() * 2ul;
  auto raw_value_size = kvmap.size() * 4ul;

1165
  ASSERT_EQ(raw_key_size + diff_internal_user_bytes, props.raw_key_size);
K
kailiu 已提交
1166 1167 1168
  ASSERT_EQ(raw_value_size, props.raw_value_size);
  ASSERT_EQ(1ul, props.num_data_blocks);
  ASSERT_EQ("", props.filter_policy_name);  // no filter policy is used
K
Kai Liu 已提交
1169 1170

  // Verify data size.
I
Igor Canadi 已提交
1171
  BlockBuilder block_builder(1);
K
Kai Liu 已提交
1172 1173 1174 1175
  for (const auto& item : kvmap) {
    block_builder.Add(item.first, item.second);
  }
  Slice content = block_builder.Finish();
1176 1177
  ASSERT_EQ(content.size() + kBlockTrailerSize + diff_internal_user_bytes,
            props.data_size);
1178
  c.ResetTableReader();
K
Kai Liu 已提交
1179 1180
}

Y
Yi Wu 已提交
1181
#ifdef SNAPPY
1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194
uint64_t BlockBasedTableTest::IndexUncompressedHelper(bool compressed) {
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
  constexpr size_t kNumKeys = 10000;

  for (size_t k = 0; k < kNumKeys; ++k) {
    c.Add("key" + ToString(k), "val" + ToString(k));
  }

  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  Options options;
  options.compression = kSnappyCompression;
  options.statistics = CreateDBStatistics();
1195
  options.statistics->set_stats_level(StatsLevel::kAll);
1196
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1197 1198 1199 1200 1201
  table_options.block_restart_interval = 1;
  table_options.enable_index_compression = compressed;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));

  ImmutableCFOptions ioptions(options);
1202
  MutableCFOptions moptions(options);
1203
  ioptions.statistics = options.statistics.get();
1204
  c.Finish(options, ioptions, moptions, table_options,
1205 1206 1207 1208
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);
  c.ResetTableReader();
  return options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED);
}
1209
TEST_P(BlockBasedTableTest, IndexUncompressed) {
1210 1211 1212 1213 1214
  uint64_t tbl1_compressed_cnt = IndexUncompressedHelper(true);
  uint64_t tbl2_compressed_cnt = IndexUncompressedHelper(false);
  // tbl1_compressed_cnt should include 1 index block
  EXPECT_EQ(tbl2_compressed_cnt + 1, tbl1_compressed_cnt);
}
Y
Yi Wu 已提交
1215
#endif  // SNAPPY
1216

1217
TEST_P(BlockBasedTableTest, BlockBasedTableProperties2) {
1218 1219 1220 1221 1222 1223
  TableConstructor c(&reverse_key_comparator);
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;

  {
    Options options;
1224
    options.compression = CompressionType::kNoCompression;
1225
    BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1226 1227 1228
    options.table_factory.reset(NewBlockBasedTableFactory(table_options));

    const ImmutableCFOptions ioptions(options);
1229 1230
    const MutableCFOptions moptions(options);
    c.Finish(options, ioptions, moptions, table_options,
1231 1232 1233 1234 1235 1236 1237 1238
             GetPlainInternalComparator(options.comparator), &keys, &kvmap);

    auto& props = *c.GetTableReader()->GetTableProperties();

    // Default comparator
    ASSERT_EQ("leveldb.BytewiseComparator", props.comparator_name);
    // No merge operator
    ASSERT_EQ("nullptr", props.merge_operator_name);
A
Aaron Gao 已提交
1239 1240
    // No prefix extractor
    ASSERT_EQ("nullptr", props.prefix_extractor_name);
1241 1242 1243 1244
    // No property collectors
    ASSERT_EQ("[]", props.property_collectors_names);
    // No filter policy is used
    ASSERT_EQ("", props.filter_policy_name);
1245 1246
    // Compression type == that set:
    ASSERT_EQ("NoCompression", props.compression_name);
1247 1248 1249 1250 1251
    c.ResetTableReader();
  }

  {
    Options options;
1252
    BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1253 1254 1255
    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
    options.comparator = &reverse_key_comparator;
    options.merge_operator = MergeOperators::CreateUInt64AddOperator();
A
Aaron Gao 已提交
1256
    options.prefix_extractor.reset(NewNoopTransform());
1257 1258 1259 1260 1261 1262
    options.table_properties_collector_factories.emplace_back(
        new DummyPropertiesCollectorFactory1());
    options.table_properties_collector_factories.emplace_back(
        new DummyPropertiesCollectorFactory2());

    const ImmutableCFOptions ioptions(options);
1263 1264
    const MutableCFOptions moptions(options);
    c.Finish(options, ioptions, moptions, table_options,
1265 1266 1267 1268 1269 1270
             GetPlainInternalComparator(options.comparator), &keys, &kvmap);

    auto& props = *c.GetTableReader()->GetTableProperties();

    ASSERT_EQ("rocksdb.ReverseBytewiseComparator", props.comparator_name);
    ASSERT_EQ("UInt64AddOperator", props.merge_operator_name);
A
Aaron Gao 已提交
1271
    ASSERT_EQ("rocksdb.Noop", props.prefix_extractor_name);
1272 1273 1274 1275 1276 1277 1278
    ASSERT_EQ("[DummyPropertiesCollector1,DummyPropertiesCollector2]",
              props.property_collectors_names);
    ASSERT_EQ("", props.filter_policy_name);  // no filter policy is used
    c.ResetTableReader();
  }
}

1279
TEST_P(BlockBasedTableTest, RangeDelBlock) {
1280 1281 1282 1283
  TableConstructor c(BytewiseComparator());
  std::vector<std::string> keys = {"1pika", "2chu"};
  std::vector<std::string> vals = {"p", "c"};

1284 1285 1286 1287 1288 1289 1290
  std::vector<RangeTombstone> expected_tombstones = {
      {"1pika", "2chu", 0},
      {"2chu", "c", 1},
      {"2chu", "c", 0},
      {"c", "p", 0},
  };

1291 1292 1293 1294 1295 1296 1297 1298 1299 1300
  for (int i = 0; i < 2; i++) {
    RangeTombstone t(keys[i], vals[i], i);
    std::pair<InternalKey, Slice> p = t.Serialize();
    c.Add(p.first.Encode().ToString(), p.second);
  }

  std::vector<std::string> sorted_keys;
  stl_wrappers::KVMap kvmap;
  Options options;
  options.compression = kNoCompression;
1301
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1302 1303 1304 1305
  table_options.block_restart_interval = 1;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));

  const ImmutableCFOptions ioptions(options);
1306
  const MutableCFOptions moptions(options);
1307 1308
  std::unique_ptr<InternalKeyComparator> internal_cmp(
      new InternalKeyComparator(options.comparator));
1309 1310
  c.Finish(options, ioptions, moptions, table_options, *internal_cmp,
           &sorted_keys, &kvmap);
1311

1312 1313 1314 1315 1316 1317 1318 1319
  for (int j = 0; j < 2; ++j) {
    std::unique_ptr<InternalIterator> iter(
        c.GetTableReader()->NewRangeTombstoneIterator(ReadOptions()));
    if (j > 0) {
      // For second iteration, delete the table reader object and verify the
      // iterator can still access its metablock's range tombstones.
      c.ResetTableReader();
    }
A
Andrew Kryczka 已提交
1320
    ASSERT_FALSE(iter->Valid());
1321
    iter->SeekToFirst();
A
Andrew Kryczka 已提交
1322
    ASSERT_TRUE(iter->Valid());
1323
    for (size_t i = 0; i < expected_tombstones.size(); i++) {
1324 1325 1326 1327
      ASSERT_TRUE(iter->Valid());
      ParsedInternalKey parsed_key;
      ASSERT_TRUE(ParseInternalKey(iter->key(), &parsed_key));
      RangeTombstone t(parsed_key, iter->value());
1328 1329 1330 1331
      const auto& expected_t = expected_tombstones[i];
      ASSERT_EQ(t.start_key_, expected_t.start_key_);
      ASSERT_EQ(t.end_key_, expected_t.end_key_);
      ASSERT_EQ(t.seq_, expected_t.seq_);
1332 1333 1334 1335
      iter->Next();
    }
    ASSERT_TRUE(!iter->Valid());
  }
1336 1337
}

1338
TEST_P(BlockBasedTableTest, FilterPolicyNameProperties) {
1339
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1340 1341
  c.Add("a1", "val1");
  std::vector<std::string> keys;
1342
  stl_wrappers::KVMap kvmap;
1343
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1344
  table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1345
  Options options;
1346
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1347

L
Lei Jin 已提交
1348
  const ImmutableCFOptions ioptions(options);
1349 1350
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options,
1351
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1352
  auto& props = *c.GetTableReader()->GetTableProperties();
K
kailiu 已提交
1353
  ASSERT_EQ("rocksdb.BuiltinBloomFilter", props.filter_policy_name);
1354
  c.ResetTableReader();
1355 1356
}

1357 1358 1359 1360
//
// BlockBasedTableTest::PrefetchTest
//
void AssertKeysInCache(BlockBasedTable* table_reader,
1361
                       const std::vector<std::string>& keys_in_cache,
1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379
                       const std::vector<std::string>& keys_not_in_cache,
                       bool convert = false) {
  if (convert) {
    for (auto key : keys_in_cache) {
      InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
      ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
    }
    for (auto key : keys_not_in_cache) {
      InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
      ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
    }
  } else {
    for (auto key : keys_in_cache) {
      ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), key));
    }
    for (auto key : keys_not_in_cache) {
      ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), key));
    }
1380 1381 1382 1383
  }
}

void PrefetchRange(TableConstructor* c, Options* opt,
1384
                   BlockBasedTableOptions* table_options, const char* key_begin,
1385 1386 1387
                   const char* key_end,
                   const std::vector<std::string>& keys_in_cache,
                   const std::vector<std::string>& keys_not_in_cache,
1388 1389
                   const Status expected_status = Status::OK()) {
  // reset the cache and reopen the table
1390
  table_options->block_cache = NewLRUCache(16 * 1024 * 1024, 4);
1391 1392
  opt->table_factory.reset(NewBlockBasedTableFactory(*table_options));
  const ImmutableCFOptions ioptions2(*opt);
1393 1394
  const MutableCFOptions moptions(*opt);
  ASSERT_OK(c->Reopen(ioptions2, moptions));
1395 1396 1397

  // prefetch
  auto* table_reader = dynamic_cast<BlockBasedTable*>(c->GetTableReader());
1398
  Status s;
1399 1400
  std::unique_ptr<Slice> begin, end;
  std::unique_ptr<InternalKey> i_begin, i_end;
1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418
  if (key_begin != nullptr) {
    if (c->ConvertToInternalKey()) {
      i_begin.reset(new InternalKey(key_begin, kMaxSequenceNumber, kTypeValue));
      begin.reset(new Slice(i_begin->Encode()));
    } else {
      begin.reset(new Slice(key_begin));
    }
  }
  if (key_end != nullptr) {
    if (c->ConvertToInternalKey()) {
      i_end.reset(new InternalKey(key_end, kMaxSequenceNumber, kTypeValue));
      end.reset(new Slice(i_end->Encode()));
    } else {
      end.reset(new Slice(key_end));
    }
  }
  s = table_reader->Prefetch(begin.get(), end.get());

1419 1420 1421
  ASSERT_TRUE(s.code() == expected_status.code());

  // assert our expectation in cache warmup
1422 1423
  AssertKeysInCache(table_reader, keys_in_cache, keys_not_in_cache,
                    c->ConvertToInternalKey());
1424
  c->ResetTableReader();
1425 1426
}

1427
TEST_P(BlockBasedTableTest, PrefetchTest) {
1428 1429 1430
  // The purpose of this test is to test the prefetching operation built into
  // BlockBasedTable.
  Options opt;
1431
  std::unique_ptr<InternalKeyComparator> ikc;
1432 1433
  ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
  opt.compression = kNoCompression;
1434
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1435 1436
  table_options.block_size = 1024;
  // big enough so we don't ever lose cached values.
1437
  table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
1438 1439
  opt.table_factory.reset(NewBlockBasedTableFactory(table_options));

1440
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1441 1442 1443 1444 1445 1446 1447 1448
  c.Add("k01", "hello");
  c.Add("k02", "hello2");
  c.Add("k03", std::string(10000, 'x'));
  c.Add("k04", std::string(200000, 'x'));
  c.Add("k05", std::string(300000, 'x'));
  c.Add("k06", "hello3");
  c.Add("k07", std::string(100000, 'x'));
  std::vector<std::string> keys;
1449
  stl_wrappers::KVMap kvmap;
1450
  const ImmutableCFOptions ioptions(opt);
1451 1452
  const MutableCFOptions moptions(opt);
  c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
1453
  c.ResetTableReader();
1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465

  // We get the following data spread :
  //
  // Data block         Index
  // ========================
  // [ k01 k02 k03 ]    k03
  // [ k04         ]    k04
  // [ k05         ]    k05
  // [ k06 k07     ]    k07


  // Simple
1466 1467 1468 1469 1470
  PrefetchRange(&c, &opt, &table_options,
                /*key_range=*/"k01", "k05",
                /*keys_in_cache=*/{"k01", "k02", "k03", "k04", "k05"},
                /*keys_not_in_cache=*/{"k06", "k07"});
  PrefetchRange(&c, &opt, &table_options, "k01", "k01", {"k01", "k02", "k03"},
1471 1472
                {"k04", "k05", "k06", "k07"});
  // odd
1473 1474 1475
  PrefetchRange(&c, &opt, &table_options, "a", "z",
                {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
  PrefetchRange(&c, &opt, &table_options, "k00", "k00", {"k01", "k02", "k03"},
1476 1477
                {"k04", "k05", "k06", "k07"});
  // Edge cases
1478 1479 1480 1481
  PrefetchRange(&c, &opt, &table_options, "k00", "k06",
                {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
  PrefetchRange(&c, &opt, &table_options, "k00", "zzz",
                {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1482
  // null keys
1483 1484 1485 1486 1487 1488
  PrefetchRange(&c, &opt, &table_options, nullptr, nullptr,
                {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
  PrefetchRange(&c, &opt, &table_options, "k04", nullptr,
                {"k04", "k05", "k06", "k07"}, {"k01", "k02", "k03"});
  PrefetchRange(&c, &opt, &table_options, nullptr, "k05",
                {"k01", "k02", "k03", "k04", "k05"}, {"k06", "k07"});
1489
  // invalid
1490
  PrefetchRange(&c, &opt, &table_options, "k06", "k00", {}, {},
1491
                Status::InvalidArgument(Slice("k06 "), Slice("k07")));
1492
  c.ResetTableReader();
1493 1494
}

1495 1496
TEST_P(BlockBasedTableTest, TotalOrderSeekOnHashIndex) {
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1497
  for (int i = 0; i <= 5; ++i) {
1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526
    Options options;
    // Make each key/value an individual block
    table_options.block_size = 64;
    switch (i) {
    case 0:
      // Binary search index
      table_options.index_type = BlockBasedTableOptions::kBinarySearch;
      options.table_factory.reset(new BlockBasedTableFactory(table_options));
      break;
    case 1:
      // Hash search index
      table_options.index_type = BlockBasedTableOptions::kHashSearch;
      options.table_factory.reset(new BlockBasedTableFactory(table_options));
      options.prefix_extractor.reset(NewFixedPrefixTransform(4));
      break;
    case 2:
      // Hash search index with hash_index_allow_collision
      table_options.index_type = BlockBasedTableOptions::kHashSearch;
      table_options.hash_index_allow_collision = true;
      options.table_factory.reset(new BlockBasedTableFactory(table_options));
      options.prefix_extractor.reset(NewFixedPrefixTransform(4));
      break;
    case 3:
      // Hash search index with filter policy
      table_options.index_type = BlockBasedTableOptions::kHashSearch;
      table_options.filter_policy.reset(NewBloomFilterPolicy(10));
      options.table_factory.reset(new BlockBasedTableFactory(table_options));
      options.prefix_extractor.reset(NewFixedPrefixTransform(4));
      break;
M
Maysam Yabandeh 已提交
1527
    case 4:
1528
      // Two-level index
M
Maysam Yabandeh 已提交
1529 1530 1531
      table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
      options.table_factory.reset(new BlockBasedTableFactory(table_options));
      break;
1532 1533 1534 1535 1536 1537
    case 5:
      // Binary search with first key
      table_options.index_type =
          BlockBasedTableOptions::kBinarySearchWithFirstKey;
      options.table_factory.reset(new BlockBasedTableFactory(table_options));
      break;
1538 1539
    }

1540 1541
    TableConstructor c(BytewiseComparator(),
                       true /* convert_to_internal_key_ */);
1542 1543 1544 1545 1546 1547 1548 1549
    c.Add("aaaa1", std::string('a', 56));
    c.Add("bbaa1", std::string('a', 56));
    c.Add("cccc1", std::string('a', 56));
    c.Add("bbbb1", std::string('a', 56));
    c.Add("baaa1", std::string('a', 56));
    c.Add("abbb1", std::string('a', 56));
    c.Add("cccc2", std::string('a', 56));
    std::vector<std::string> keys;
1550
    stl_wrappers::KVMap kvmap;
L
Lei Jin 已提交
1551
    const ImmutableCFOptions ioptions(options);
1552 1553
    const MutableCFOptions moptions(options);
    c.Finish(options, ioptions, moptions, table_options,
1554 1555 1556 1557 1558 1559
             GetPlainInternalComparator(options.comparator), &keys, &kvmap);
    auto props = c.GetTableReader()->GetTableProperties();
    ASSERT_EQ(7u, props->num_data_blocks);
    auto* reader = c.GetTableReader();
    ReadOptions ro;
    ro.total_order_seek = true;
1560 1561 1562
    std::unique_ptr<InternalIterator> iter(reader->NewIterator(
        ro, moptions.prefix_extractor.get(), /*arena=*/nullptr,
        /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592

    iter->Seek(InternalKey("b", 0, kTypeValue).Encode());
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("baaa1", ExtractUserKey(iter->key()).ToString());
    iter->Next();
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString());

    iter->Seek(InternalKey("bb", 0, kTypeValue).Encode());
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString());
    iter->Next();
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString());

    iter->Seek(InternalKey("bbb", 0, kTypeValue).Encode());
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString());
    iter->Next();
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("cccc1", ExtractUserKey(iter->key()).ToString());
  }
}

1593 1594
TEST_P(BlockBasedTableTest, NoopTransformSeek) {
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610
  table_options.filter_policy.reset(NewBloomFilterPolicy(10));

  Options options;
  options.comparator = BytewiseComparator();
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
  options.prefix_extractor.reset(NewNoopTransform());

  TableConstructor c(options.comparator);
  // To tickle the PrefixMayMatch bug it is important that the
  // user-key is a single byte so that the index key exactly matches
  // the user-key.
  InternalKey key("a", 1, kTypeValue);
  c.Add(key.Encode().ToString(), "b");
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  const ImmutableCFOptions ioptions(options);
1611
  const MutableCFOptions moptions(options);
1612
  const InternalKeyComparator internal_comparator(options.comparator);
1613 1614
  c.Finish(options, ioptions, moptions, table_options, internal_comparator,
           &keys, &kvmap);
1615 1616 1617 1618 1619

  auto* reader = c.GetTableReader();
  for (int i = 0; i < 2; ++i) {
    ReadOptions ro;
    ro.total_order_seek = (i == 0);
1620 1621 1622
    std::unique_ptr<InternalIterator> iter(reader->NewIterator(
        ro, moptions.prefix_extractor.get(), /*arena=*/nullptr,
        /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1623 1624 1625 1626 1627 1628 1629 1630

    iter->Seek(key.Encode());
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("a", ExtractUserKey(iter->key()).ToString());
  }
}

1631
TEST_P(BlockBasedTableTest, SkipPrefixBloomFilter) {
A
Aaron Gao 已提交
1632 1633
  // if DB is opened with a prefix extractor of a different name,
  // prefix bloom is skipped when read the file
1634
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
A
Aaron Gao 已提交
1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648
  table_options.filter_policy.reset(NewBloomFilterPolicy(2));
  table_options.whole_key_filtering = false;

  Options options;
  options.comparator = BytewiseComparator();
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
  options.prefix_extractor.reset(NewFixedPrefixTransform(1));

  TableConstructor c(options.comparator);
  InternalKey key("abcdefghijk", 1, kTypeValue);
  c.Add(key.Encode().ToString(), "test");
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  const ImmutableCFOptions ioptions(options);
1649
  const MutableCFOptions moptions(options);
A
Aaron Gao 已提交
1650
  const InternalKeyComparator internal_comparator(options.comparator);
1651 1652 1653
  c.Finish(options, ioptions, moptions, table_options, internal_comparator,
           &keys, &kvmap);
  // TODO(Zhongyi): update test to use MutableCFOptions
A
Aaron Gao 已提交
1654 1655
  options.prefix_extractor.reset(NewFixedPrefixTransform(9));
  const ImmutableCFOptions new_ioptions(options);
1656 1657
  const MutableCFOptions new_moptions(options);
  c.Reopen(new_ioptions, new_moptions);
A
Aaron Gao 已提交
1658
  auto reader = c.GetTableReader();
1659 1660 1661
  std::unique_ptr<InternalIterator> db_iter(reader->NewIterator(
      ReadOptions(), new_moptions.prefix_extractor.get(), /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized));
A
Aaron Gao 已提交
1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673

  // Test point lookup
  // only one kv
  for (auto& kv : kvmap) {
    db_iter->Seek(kv.first);
    ASSERT_TRUE(db_iter->Valid());
    ASSERT_OK(db_iter->status());
    ASSERT_EQ(db_iter->key(), kv.first);
    ASSERT_EQ(db_iter->value(), kv.second);
  }
}

K
Kai Liu 已提交
1674 1675 1676 1677 1678 1679
static std::string RandomString(Random* rnd, int len) {
  std::string r;
  test::RandomString(rnd, len, &r);
  return r;
}

1680
void AddInternalKey(TableConstructor* c, const std::string& prefix,
1681
                    std::string value = "v", int /*suffix_len*/ = 800) {
1682 1683
  static Random rnd(1023);
  InternalKey k(prefix + RandomString(&rnd, 800), 0, kTypeValue);
1684
  c->Add(k.Encode().ToString(), value);
1685 1686
}

M
Maysam Yabandeh 已提交
1687
void TableTest::IndexTest(BlockBasedTableOptions table_options) {
1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707
  TableConstructor c(BytewiseComparator());

  // keys with prefix length 3, make sure the key/value is big enough to fill
  // one block
  AddInternalKey(&c, "0015");
  AddInternalKey(&c, "0035");

  AddInternalKey(&c, "0054");
  AddInternalKey(&c, "0055");

  AddInternalKey(&c, "0056");
  AddInternalKey(&c, "0057");

  AddInternalKey(&c, "0058");
  AddInternalKey(&c, "0075");

  AddInternalKey(&c, "0076");
  AddInternalKey(&c, "0095");

  std::vector<std::string> keys;
1708
  stl_wrappers::KVMap kvmap;
1709
  Options options;
1710 1711
  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
  table_options.block_size = 1700;
1712
  table_options.block_cache = NewLRUCache(1024, 4);
1713
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1714 1715 1716

  std::unique_ptr<InternalKeyComparator> comparator(
      new InternalKeyComparator(BytewiseComparator()));
L
Lei Jin 已提交
1717
  const ImmutableCFOptions ioptions(options);
1718 1719 1720
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
           &kvmap);
1721
  auto reader = c.GetTableReader();
1722

1723
  auto props = reader->GetTableProperties();
1724 1725
  ASSERT_EQ(5u, props->num_data_blocks);

1726
  // TODO(Zhongyi): update test to use MutableCFOptions
1727 1728 1729
  std::unique_ptr<InternalIterator> index_iter(reader->NewIterator(
      ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1730 1731 1732 1733 1734 1735 1736 1737

  // -- Find keys do not exist, but have common prefix.
  std::vector<std::string> prefixes = {"001", "003", "005", "007", "009"};
  std::vector<std::string> lower_bound = {keys[0], keys[1], keys[2],
                                          keys[7], keys[9], };

  // find the lower bound of the prefix
  for (size_t i = 0; i < prefixes.size(); ++i) {
M
Maysam Yabandeh 已提交
1738 1739 1740
    index_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode());
    ASSERT_OK(index_iter->status());
    ASSERT_TRUE(index_iter->Valid());
1741 1742

    // seek the first element in the block
M
Maysam Yabandeh 已提交
1743 1744
    ASSERT_EQ(lower_bound[i], index_iter->key().ToString());
    ASSERT_EQ("v", index_iter->value().ToString());
1745 1746 1747 1748 1749 1750 1751 1752
  }

  // find the upper bound of prefixes
  std::vector<std::string> upper_bound = {keys[1], keys[2], keys[7], keys[9], };

  // find existing keys
  for (const auto& item : kvmap) {
    auto ukey = ExtractUserKey(item.first).ToString();
M
Maysam Yabandeh 已提交
1753
    index_iter->Seek(ukey);
1754 1755

    // ASSERT_OK(regular_iter->status());
M
Maysam Yabandeh 已提交
1756
    ASSERT_OK(index_iter->status());
1757 1758

    // ASSERT_TRUE(regular_iter->Valid());
M
Maysam Yabandeh 已提交
1759
    ASSERT_TRUE(index_iter->Valid());
1760

M
Maysam Yabandeh 已提交
1761 1762
    ASSERT_EQ(item.first, index_iter->key().ToString());
    ASSERT_EQ(item.second, index_iter->value().ToString());
1763 1764 1765 1766 1767
  }

  for (size_t i = 0; i < prefixes.size(); ++i) {
    // the key is greater than any existing keys.
    auto key = prefixes[i] + "9";
M
Maysam Yabandeh 已提交
1768
    index_iter->Seek(InternalKey(key, 0, kTypeValue).Encode());
1769

M
Maysam Yabandeh 已提交
1770
    ASSERT_OK(index_iter->status());
1771 1772
    if (i == prefixes.size() - 1) {
      // last key
M
Maysam Yabandeh 已提交
1773
      ASSERT_TRUE(!index_iter->Valid());
1774
    } else {
M
Maysam Yabandeh 已提交
1775
      ASSERT_TRUE(index_iter->Valid());
1776
      // seek the first element in the block
M
Maysam Yabandeh 已提交
1777 1778
      ASSERT_EQ(upper_bound[i], index_iter->key().ToString());
      ASSERT_EQ("v", index_iter->value().ToString());
1779 1780 1781 1782 1783 1784
    }
  }

  // find keys with prefix that don't match any of the existing prefixes.
  std::vector<std::string> non_exist_prefixes = {"002", "004", "006", "008"};
  for (const auto& prefix : non_exist_prefixes) {
M
Maysam Yabandeh 已提交
1785
    index_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode());
1786 1787
    // regular_iter->Seek(prefix);

M
Maysam Yabandeh 已提交
1788
    ASSERT_OK(index_iter->status());
1789 1790
    // Seek to non-existing prefixes should yield either invalid, or a
    // key with prefix greater than the target.
M
Maysam Yabandeh 已提交
1791 1792
    if (index_iter->Valid()) {
      Slice ukey = ExtractUserKey(index_iter->key());
1793 1794 1795
      Slice ukey_prefix = options.prefix_extractor->Transform(ukey);
      ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) < 0);
    }
1796
  }
1797
  c.ResetTableReader();
1798 1799
}

1800 1801
TEST_P(BlockBasedTableTest, BinaryIndexTest) {
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
M
Maysam Yabandeh 已提交
1802 1803 1804 1805
  table_options.index_type = BlockBasedTableOptions::kBinarySearch;
  IndexTest(table_options);
}

1806 1807
TEST_P(BlockBasedTableTest, HashIndexTest) {
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
M
Maysam Yabandeh 已提交
1808 1809 1810 1811
  table_options.index_type = BlockBasedTableOptions::kHashSearch;
  IndexTest(table_options);
}

1812
TEST_P(BlockBasedTableTest, PartitionIndexTest) {
M
Maysam Yabandeh 已提交
1813
  const int max_index_keys = 5;
M
Maysam Yabandeh 已提交
1814 1815 1816
  const int est_max_index_key_value_size = 32;
  const int est_max_index_size = max_index_keys * est_max_index_key_value_size;
  for (int i = 1; i <= est_max_index_size + 1; i++) {
1817
    BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
M
Maysam Yabandeh 已提交
1818
    table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
M
Maysam Yabandeh 已提交
1819
    table_options.metadata_block_size = i;
M
Maysam Yabandeh 已提交
1820 1821 1822 1823
    IndexTest(table_options);
  }
}

1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844
TEST_P(BlockBasedTableTest, IndexSeekOptimizationIncomplete) {
  std::unique_ptr<InternalKeyComparator> comparator(
      new InternalKeyComparator(BytewiseComparator()));
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  Options options;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  const ImmutableCFOptions ioptions(options);
  const MutableCFOptions moptions(options);

  TableConstructor c(BytewiseComparator());
  AddInternalKey(&c, "pika");

  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
           &kvmap);
  ASSERT_EQ(1, keys.size());

  auto reader = c.GetTableReader();
  ReadOptions ropt;
  ropt.read_tier = ReadTier::kBlockCacheTier;
1845 1846 1847
  std::unique_ptr<InternalIterator> iter(reader->NewIterator(
      ropt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862

  auto ikey = [](Slice user_key) {
    return InternalKey(user_key, 0, kTypeValue).Encode().ToString();
  };

  iter->Seek(ikey("pika"));
  ASSERT_FALSE(iter->Valid());
  ASSERT_TRUE(iter->status().IsIncomplete());

  // This used to crash at some point.
  iter->Seek(ikey("pika"));
  ASSERT_FALSE(iter->Valid());
  ASSERT_TRUE(iter->status().IsIncomplete());
}

1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142
TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey1) {
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey;
  IndexTest(table_options);
}

class CustomFlushBlockPolicy : public FlushBlockPolicyFactory,
                               public FlushBlockPolicy {
 public:
  explicit CustomFlushBlockPolicy(std::vector<int> keys_per_block)
      : keys_per_block_(keys_per_block) {}

  const char* Name() const override { return "table_test"; }
  FlushBlockPolicy* NewFlushBlockPolicy(const BlockBasedTableOptions&,
                                        const BlockBuilder&) const override {
    return new CustomFlushBlockPolicy(keys_per_block_);
  }

  bool Update(const Slice&, const Slice&) override {
    if (keys_in_current_block_ >= keys_per_block_.at(current_block_idx_)) {
      ++current_block_idx_;
      keys_in_current_block_ = 1;
      return true;
    }

    ++keys_in_current_block_;
    return false;
  }

  std::vector<int> keys_per_block_;

  int current_block_idx_ = 0;
  int keys_in_current_block_ = 0;
};

TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey2) {
  for (int use_first_key = 0; use_first_key < 2; ++use_first_key) {
    SCOPED_TRACE("use_first_key = " + std::to_string(use_first_key));
    BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
    table_options.index_type =
        use_first_key ? BlockBasedTableOptions::kBinarySearchWithFirstKey
                      : BlockBasedTableOptions::kBinarySearch;
    table_options.block_cache = NewLRUCache(10000);  // fits all blocks
    table_options.index_shortening =
        BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
    table_options.flush_block_policy_factory =
        std::make_shared<CustomFlushBlockPolicy>(std::vector<int>{2, 1, 3, 2});
    Options options;
    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
    options.statistics = CreateDBStatistics();
    Statistics* stats = options.statistics.get();
    std::unique_ptr<InternalKeyComparator> comparator(
        new InternalKeyComparator(BytewiseComparator()));
    const ImmutableCFOptions ioptions(options);
    const MutableCFOptions moptions(options);

    TableConstructor c(BytewiseComparator());

    // Block 0.
    AddInternalKey(&c, "aaaa", "v0");
    AddInternalKey(&c, "aaac", "v1");

    // Block 1.
    AddInternalKey(&c, "aaca", "v2");

    // Block 2.
    AddInternalKey(&c, "caaa", "v3");
    AddInternalKey(&c, "caac", "v4");
    AddInternalKey(&c, "caae", "v5");

    // Block 3.
    AddInternalKey(&c, "ccaa", "v6");
    AddInternalKey(&c, "ccac", "v7");

    // Write the file.
    std::vector<std::string> keys;
    stl_wrappers::KVMap kvmap;
    c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
             &kvmap);
    ASSERT_EQ(8, keys.size());

    auto reader = c.GetTableReader();
    auto props = reader->GetTableProperties();
    ASSERT_EQ(4u, props->num_data_blocks);
    std::unique_ptr<InternalIterator> iter(reader->NewIterator(
        ReadOptions(), /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
        /*skip_filters=*/false, TableReaderCaller::kUncategorized));

    // Shouldn't have read data blocks before iterator is seeked.
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    auto ikey = [](Slice user_key) {
      return InternalKey(user_key, 0, kTypeValue).Encode().ToString();
    };

    // Seek to a key between blocks. If index contains first key, we shouldn't
    // read any data blocks until value is requested.
    iter->Seek(ikey("aaba"));
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[2], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 0 : 1,
              stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ("v2", iter->value().ToString());
    EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Seek to the middle of a block. The block should be read right away.
    iter->Seek(ikey("caab"));
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[4], iter->key().ToString());
    EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
    EXPECT_EQ("v4", iter->value().ToString());
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Seek to just before the same block and don't access value.
    // The iterator should keep pinning the block contents.
    iter->Seek(ikey("baaa"));
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[3], iter->key().ToString());
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Seek to the same block again to check that the block is still pinned.
    iter->Seek(ikey("caae"));
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[5], iter->key().ToString());
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
    EXPECT_EQ("v5", iter->value().ToString());
    EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Step forward and fall through to the next block. Don't access value.
    iter->Next();
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[6], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 2 : 3,
              stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Step forward again. Block should be read.
    iter->Next();
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[7], iter->key().ToString());
    EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ("v7", iter->value().ToString());
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Step forward and reach the end.
    iter->Next();
    EXPECT_FALSE(iter->Valid());
    EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Seek to a single-key block and step forward without accessing value.
    iter->Seek(ikey("aaca"));
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[2], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 0 : 1,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    iter->Next();
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[3], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 1 : 2,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
    EXPECT_EQ("v3", iter->value().ToString());
    EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
    EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));

    // Seek between blocks and step back without accessing value.
    iter->Seek(ikey("aaca"));
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[2], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 2 : 3,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
    EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));

    iter->Prev();
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[1], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 2 : 3,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
    // All blocks are in cache now, there'll be no more misses ever.
    EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ("v1", iter->value().ToString());

    // Next into the next block again.
    iter->Next();
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[2], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 2 : 4,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Seek to first and step back without accessing value.
    iter->SeekToFirst();
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[0], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 2 : 5,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    iter->Prev();
    EXPECT_FALSE(iter->Valid());
    EXPECT_EQ(use_first_key ? 2 : 5,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Do some SeekForPrev() and SeekToLast() just to cover all methods.
    iter->SeekForPrev(ikey("caad"));
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[4], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 3 : 6,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
    EXPECT_EQ("v4", iter->value().ToString());
    EXPECT_EQ(use_first_key ? 3 : 6,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    iter->SeekToLast();
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[7], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 4 : 7,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
    EXPECT_EQ("v7", iter->value().ToString());
    EXPECT_EQ(use_first_key ? 4 : 7,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));

    c.ResetTableReader();
  }
}

TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKeyGlobalSeqno) {
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey;
  table_options.block_cache = NewLRUCache(10000);
  Options options;
  options.statistics = CreateDBStatistics();
  Statistics* stats = options.statistics.get();
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  std::unique_ptr<InternalKeyComparator> comparator(
      new InternalKeyComparator(BytewiseComparator()));
  const ImmutableCFOptions ioptions(options);
  const MutableCFOptions moptions(options);

  TableConstructor c(BytewiseComparator(), /* convert_to_internal_key */ false,
                     /* level */ -1, /* largest_seqno */ 42);

  c.Add(InternalKey("b", 0, kTypeValue).Encode().ToString(), "x");
  c.Add(InternalKey("c", 0, kTypeValue).Encode().ToString(), "y");

  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
           &kvmap);
  ASSERT_EQ(2, keys.size());

  auto reader = c.GetTableReader();
  auto props = reader->GetTableProperties();
  ASSERT_EQ(1u, props->num_data_blocks);
  std::unique_ptr<InternalIterator> iter(reader->NewIterator(
      ReadOptions(), /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized));

  iter->Seek(InternalKey("a", 0, kTypeValue).Encode().ToString());
  ASSERT_TRUE(iter->Valid());
  EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(),
            iter->key().ToString());
  EXPECT_NE(keys[0], iter->key().ToString());
  // Key should have been served from index, without reading data blocks.
  EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));

  EXPECT_EQ("x", iter->value().ToString());
  EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
  EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
  EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(),
            iter->key().ToString());

  c.ResetTableReader();
}

K
Kai Liu 已提交
2143 2144 2145
// It's very hard to figure out the index block size of a block accurately.
// To make sure we get the index size, we just make sure as key number
// grows, the filter block size also grows.
2146
TEST_P(BlockBasedTableTest, IndexSizeStat) {
K
Kai Liu 已提交
2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161
  uint64_t last_index_size = 0;

  // we need to use random keys since the pure human readable texts
  // may be well compressed, resulting insignifcant change of index
  // block size.
  Random rnd(test::RandomSeed());
  std::vector<std::string> keys;

  for (int i = 0; i < 100; ++i) {
    keys.push_back(RandomString(&rnd, 10000));
  }

  // Each time we load one more key to the table. the table index block
  // size is expected to be larger than last time's.
  for (size_t i = 1; i < keys.size(); ++i) {
2162 2163
    TableConstructor c(BytewiseComparator(),
                       true /* convert_to_internal_key_ */);
K
Kai Liu 已提交
2164 2165 2166 2167 2168
    for (size_t j = 0; j < i; ++j) {
      c.Add(keys[j], "val");
    }

    std::vector<std::string> ks;
2169
    stl_wrappers::KVMap kvmap;
2170
    Options options;
K
Kai Liu 已提交
2171
    options.compression = kNoCompression;
2172
    BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2173 2174
    table_options.block_restart_interval = 1;
    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
K
Kai Liu 已提交
2175

L
Lei Jin 已提交
2176
    const ImmutableCFOptions ioptions(options);
2177 2178
    const MutableCFOptions moptions(options);
    c.Finish(options, ioptions, moptions, table_options,
2179
             GetPlainInternalComparator(options.comparator), &ks, &kvmap);
2180
    auto index_size = c.GetTableReader()->GetTableProperties()->index_size;
K
Kai Liu 已提交
2181 2182
    ASSERT_GT(index_size, last_index_size);
    last_index_size = index_size;
2183
    c.ResetTableReader();
K
Kai Liu 已提交
2184 2185 2186
  }
}

2187
TEST_P(BlockBasedTableTest, NumBlockStat) {
K
Kai Liu 已提交
2188
  Random rnd(test::RandomSeed());
2189
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
K
Kai Liu 已提交
2190 2191
  Options options;
  options.compression = kNoCompression;
2192
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2193 2194 2195
  table_options.block_restart_interval = 1;
  table_options.block_size = 1000;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
K
Kai Liu 已提交
2196 2197 2198 2199 2200 2201 2202 2203

  for (int i = 0; i < 10; ++i) {
    // the key/val are slightly smaller than block size, so that each block
    // holds roughly one key/value pair.
    c.Add(RandomString(&rnd, 900), "val");
  }

  std::vector<std::string> ks;
2204
  stl_wrappers::KVMap kvmap;
L
Lei Jin 已提交
2205
  const ImmutableCFOptions ioptions(options);
2206 2207
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options,
2208
           GetPlainInternalComparator(options.comparator), &ks, &kvmap);
2209
  ASSERT_EQ(kvmap.size(),
2210
            c.GetTableReader()->GetTableProperties()->num_data_blocks);
2211
  c.ResetTableReader();
K
Kai Liu 已提交
2212 2213
}

2214 2215
// A simple tool that takes the snapshot of block cache statistics.
class BlockCachePropertiesSnapshot {
K
Kai Liu 已提交
2216
 public:
2217
  explicit BlockCachePropertiesSnapshot(Statistics* statistics) {
I
Igor Canadi 已提交
2218 2219 2220 2221 2222 2223
    block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_MISS);
    block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_HIT);
    index_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS);
    index_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT);
    data_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_DATA_MISS);
    data_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_DATA_HIT);
2224 2225 2226
    filter_block_cache_miss =
        statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS);
    filter_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT);
2227 2228 2229
    block_cache_bytes_read = statistics->getTickerCount(BLOCK_CACHE_BYTES_READ);
    block_cache_bytes_write =
        statistics->getTickerCount(BLOCK_CACHE_BYTES_WRITE);
2230 2231
  }

I
Igor Canadi 已提交
2232 2233 2234 2235
  void AssertIndexBlockStat(int64_t expected_index_block_cache_miss,
                            int64_t expected_index_block_cache_hit) {
    ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss);
    ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit);
2236 2237
  }

I
Igor Canadi 已提交
2238 2239 2240 2241
  void AssertFilterBlockStat(int64_t expected_filter_block_cache_miss,
                             int64_t expected_filter_block_cache_hit) {
    ASSERT_EQ(expected_filter_block_cache_miss, filter_block_cache_miss);
    ASSERT_EQ(expected_filter_block_cache_hit, filter_block_cache_hit);
K
Kai Liu 已提交
2242 2243
  }

K
kailiu 已提交
2244
  // Check if the fetched props matches the expected ones.
2245
  // TODO(kailiu) Use this only when you disabled filter policy!
I
Igor Canadi 已提交
2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257
  void AssertEqual(int64_t expected_index_block_cache_miss,
                   int64_t expected_index_block_cache_hit,
                   int64_t expected_data_block_cache_miss,
                   int64_t expected_data_block_cache_hit) const {
    ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss);
    ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit);
    ASSERT_EQ(expected_data_block_cache_miss, data_block_cache_miss);
    ASSERT_EQ(expected_data_block_cache_hit, data_block_cache_hit);
    ASSERT_EQ(expected_index_block_cache_miss + expected_data_block_cache_miss,
              block_cache_miss);
    ASSERT_EQ(expected_index_block_cache_hit + expected_data_block_cache_hit,
              block_cache_hit);
K
Kai Liu 已提交
2258 2259
  }

2260 2261 2262 2263
  int64_t GetCacheBytesRead() { return block_cache_bytes_read; }

  int64_t GetCacheBytesWrite() { return block_cache_bytes_write; }

K
Kai Liu 已提交
2264
 private:
2265 2266 2267 2268 2269 2270
  int64_t block_cache_miss = 0;
  int64_t block_cache_hit = 0;
  int64_t index_block_cache_miss = 0;
  int64_t index_block_cache_hit = 0;
  int64_t data_block_cache_miss = 0;
  int64_t data_block_cache_hit = 0;
2271 2272
  int64_t filter_block_cache_miss = 0;
  int64_t filter_block_cache_hit = 0;
2273 2274
  int64_t block_cache_bytes_read = 0;
  int64_t block_cache_bytes_write = 0;
K
Kai Liu 已提交
2275 2276
};

2277 2278
// Make sure, by default, index/filter blocks were pre-loaded (meaning we won't
// use block cache to store them).
2279
TEST_P(BlockBasedTableTest, BlockCacheDisabledTest) {
2280 2281 2282
  Options options;
  options.create_if_missing = true;
  options.statistics = CreateDBStatistics();
2283
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2284
  table_options.block_cache = NewLRUCache(1024, 4);
2285
  table_options.filter_policy.reset(NewBloomFilterPolicy(10));
2286 2287
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
  std::vector<std::string> keys;
2288
  stl_wrappers::KVMap kvmap;
2289

2290
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2291
  c.Add("key", "value");
L
Lei Jin 已提交
2292
  const ImmutableCFOptions ioptions(options);
2293 2294
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options,
2295
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2296 2297

  // preloading filter/index blocks is enabled.
2298
  auto reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2299
  ASSERT_FALSE(reader->TEST_FilterBlockInCache());
2300
  ASSERT_FALSE(reader->TEST_IndexBlockInCache());
2301 2302 2303 2304 2305 2306 2307 2308 2309

  {
    // nothing happens in the beginning
    BlockCachePropertiesSnapshot props(options.statistics.get());
    props.AssertIndexBlockStat(0, 0);
    props.AssertFilterBlockStat(0, 0);
  }

  {
2310
    GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2311
                           GetContext::kNotFound, Slice(), nullptr, nullptr,
A
Andrew Kryczka 已提交
2312
                           nullptr, nullptr, nullptr);
2313
    // a hack that just to trigger BlockBasedTable::GetFilter.
2314 2315
    reader->Get(ReadOptions(), "non-exist-key", &get_context,
                moptions.prefix_extractor.get());
2316 2317 2318 2319 2320 2321 2322 2323
    BlockCachePropertiesSnapshot props(options.statistics.get());
    props.AssertIndexBlockStat(0, 0);
    props.AssertFilterBlockStat(0, 0);
  }
}

// Due to the difficulities of the intersaction between statistics, this test
// only tests the case when "index block is put to block cache"
2324
TEST_P(BlockBasedTableTest, FilterBlockInBlockCache) {
K
Kai Liu 已提交
2325
  // -- Table construction
2326
  Options options;
K
Kai Liu 已提交
2327
  options.create_if_missing = true;
2328
  options.statistics = CreateDBStatistics();
2329 2330

  // Enable the cache for index/filter blocks
2331
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2332
  table_options.block_cache = NewLRUCache(2048, 2);
2333 2334
  table_options.cache_index_and_filter_blocks = true;
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
K
Kai Liu 已提交
2335
  std::vector<std::string> keys;
2336
  stl_wrappers::KVMap kvmap;
K
Kai Liu 已提交
2337

2338
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
K
Kai Liu 已提交
2339
  c.Add("key", "value");
L
Lei Jin 已提交
2340
  const ImmutableCFOptions ioptions(options);
2341 2342
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options,
2343
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2344
  // preloading filter/index blocks is prohibited.
2345
  auto* reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2346
  ASSERT_FALSE(reader->TEST_FilterBlockInCache());
2347
  ASSERT_TRUE(reader->TEST_IndexBlockInCache());
K
Kai Liu 已提交
2348 2349 2350

  // -- PART 1: Open with regular block cache.
  // Since block_cache is disabled, no cache activities will be involved.
2351
  std::unique_ptr<InternalIterator> iter;
K
Kai Liu 已提交
2352

2353
  int64_t last_cache_bytes_read = 0;
K
Kai Liu 已提交
2354 2355
  // At first, no block will be accessed.
  {
2356
    BlockCachePropertiesSnapshot props(options.statistics.get());
K
Kai Liu 已提交
2357
    // index will be added to block cache.
2358 2359
    props.AssertEqual(1,  // index block miss
                      0, 0, 0);
2360 2361 2362 2363
    ASSERT_EQ(props.GetCacheBytesRead(), 0);
    ASSERT_EQ(props.GetCacheBytesWrite(),
              table_options.block_cache->GetUsage());
    last_cache_bytes_read = props.GetCacheBytesRead();
K
Kai Liu 已提交
2364 2365 2366 2367
  }

  // Only index block will be accessed
  {
2368
    iter.reset(c.NewIterator(moptions.prefix_extractor.get()));
2369
    BlockCachePropertiesSnapshot props(options.statistics.get());
K
Kai Liu 已提交
2370 2371 2372
    // NOTE: to help better highlight the "detla" of each ticker, I use
    // <last_value> + <added_value> to indicate the increment of changed
    // value; other numbers remain the same.
2373 2374
    props.AssertEqual(1, 0 + 1,  // index block hit
                      0, 0);
2375 2376 2377 2378 2379
    // Cache hit, bytes read from cache should increase
    ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read);
    ASSERT_EQ(props.GetCacheBytesWrite(),
              table_options.block_cache->GetUsage());
    last_cache_bytes_read = props.GetCacheBytesRead();
K
Kai Liu 已提交
2380 2381 2382 2383 2384
  }

  // Only data block will be accessed
  {
    iter->SeekToFirst();
2385
    BlockCachePropertiesSnapshot props(options.statistics.get());
2386 2387
    props.AssertEqual(1, 1, 0 + 1,  // data block miss
                      0);
2388 2389 2390 2391 2392
    // Cache miss, Bytes read from cache should not change
    ASSERT_EQ(props.GetCacheBytesRead(), last_cache_bytes_read);
    ASSERT_EQ(props.GetCacheBytesWrite(),
              table_options.block_cache->GetUsage());
    last_cache_bytes_read = props.GetCacheBytesRead();
K
Kai Liu 已提交
2393 2394 2395 2396
  }

  // Data block will be in cache
  {
2397
    iter.reset(c.NewIterator(moptions.prefix_extractor.get()));
K
Kai Liu 已提交
2398
    iter->SeekToFirst();
2399
    BlockCachePropertiesSnapshot props(options.statistics.get());
2400 2401
    props.AssertEqual(1, 1 + 1, /* index block hit */
                      1, 0 + 1 /* data block hit */);
2402 2403 2404 2405
    // Cache hit, bytes read from cache should increase
    ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read);
    ASSERT_EQ(props.GetCacheBytesWrite(),
              table_options.block_cache->GetUsage());
K
Kai Liu 已提交
2406 2407 2408
  }
  // release the iterator so that the block cache can reset correctly.
  iter.reset();
2409

2410 2411
  c.ResetTableReader();

2412
  // -- PART 2: Open with very small block cache
K
Kai Liu 已提交
2413 2414
  // In this test, no block will ever get hit since the block cache is
  // too small to fit even one entry.
2415
  table_options.block_cache = NewLRUCache(1, 4);
2416
  options.statistics = CreateDBStatistics();
2417
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
L
Lei Jin 已提交
2418
  const ImmutableCFOptions ioptions2(options);
2419 2420
  const MutableCFOptions moptions2(options);
  c.Reopen(ioptions2, moptions2);
K
Kai Liu 已提交
2421
  {
2422
    BlockCachePropertiesSnapshot props(options.statistics.get());
2423 2424
    props.AssertEqual(1,  // index block miss
                      0, 0, 0);
2425 2426
    // Cache miss, Bytes read from cache should not change
    ASSERT_EQ(props.GetCacheBytesRead(), 0);
K
Kai Liu 已提交
2427 2428 2429 2430 2431 2432
  }

  {
    // Both index and data block get accessed.
    // It first cache index block then data block. But since the cache size
    // is only 1, index block will be purged after data block is inserted.
2433
    iter.reset(c.NewIterator(moptions2.prefix_extractor.get()));
2434
    BlockCachePropertiesSnapshot props(options.statistics.get());
2435 2436 2437
    props.AssertEqual(1 + 1,  // index block miss
                      0, 0,   // data block miss
                      0);
2438 2439
    // Cache hit, bytes read from cache should increase
    ASSERT_EQ(props.GetCacheBytesRead(), 0);
K
Kai Liu 已提交
2440 2441 2442 2443 2444 2445
  }

  {
    // SeekToFirst() accesses data block. With similar reason, we expect data
    // block's cache miss.
    iter->SeekToFirst();
2446
    BlockCachePropertiesSnapshot props(options.statistics.get());
2447 2448
    props.AssertEqual(2, 0, 0 + 1,  // data block miss
                      0);
2449 2450
    // Cache miss, Bytes read from cache should not change
    ASSERT_EQ(props.GetCacheBytesRead(), 0);
K
Kai Liu 已提交
2451
  }
2452
  iter.reset();
2453
  c.ResetTableReader();
2454 2455

  // -- PART 3: Open table with bloom filter enabled but not in SST file
2456
  table_options.block_cache = NewLRUCache(4096, 4);
2457 2458 2459 2460
  table_options.cache_index_and_filter_blocks = false;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));

  TableConstructor c3(BytewiseComparator());
L
Lei Jin 已提交
2461 2462 2463
  std::string user_key = "k01";
  InternalKey internal_key(user_key, 0, kTypeValue);
  c3.Add(internal_key.Encode().ToString(), "hello");
2464
  ImmutableCFOptions ioptions3(options);
2465
  MutableCFOptions moptions3(options);
2466
  // Generate table without filter policy
2467
  c3.Finish(options, ioptions3, moptions3, table_options,
2468 2469 2470
            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
  c3.ResetTableReader();

2471 2472 2473
  // Open table with filter policy
  table_options.filter_policy.reset(NewBloomFilterPolicy(1));
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
2474
  options.statistics = CreateDBStatistics();
2475
  ImmutableCFOptions ioptions4(options);
2476 2477
  MutableCFOptions moptions4(options);
  ASSERT_OK(c3.Reopen(ioptions4, moptions4));
2478
  reader = dynamic_cast<BlockBasedTable*>(c3.GetTableReader());
2479
  ASSERT_FALSE(reader->TEST_FilterBlockInCache());
M
Maysam Yabandeh 已提交
2480
  PinnableSlice value;
2481
  GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2482
                         GetContext::kNotFound, user_key, &value, nullptr,
A
Andrew Kryczka 已提交
2483
                         nullptr, nullptr, nullptr);
M
Maysam Yabandeh 已提交
2484
  ASSERT_OK(reader->Get(ReadOptions(), internal_key.Encode(), &get_context,
2485
                        moptions4.prefix_extractor.get()));
M
Maysam Yabandeh 已提交
2486
  ASSERT_STREQ(value.data(), "hello");
2487 2488
  BlockCachePropertiesSnapshot props(options.statistics.get());
  props.AssertFilterBlockStat(0, 0);
2489
  c3.ResetTableReader();
K
Kai Liu 已提交
2490 2491
}

2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515
void ValidateBlockSizeDeviation(int value, int expected) {
  BlockBasedTableOptions table_options;
  table_options.block_size_deviation = value;
  BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options);

  const BlockBasedTableOptions* normalized_table_options =
      (const BlockBasedTableOptions*)factory->GetOptions();
  ASSERT_EQ(normalized_table_options->block_size_deviation, expected);

  delete factory;
}

void ValidateBlockRestartInterval(int value, int expected) {
  BlockBasedTableOptions table_options;
  table_options.block_restart_interval = value;
  BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options);

  const BlockBasedTableOptions* normalized_table_options =
      (const BlockBasedTableOptions*)factory->GetOptions();
  ASSERT_EQ(normalized_table_options->block_restart_interval, expected);

  delete factory;
}

2516
TEST_P(BlockBasedTableTest, InvalidOptions) {
2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535
  // invalid values for block_size_deviation (<0 or >100) are silently set to 0
  ValidateBlockSizeDeviation(-10, 0);
  ValidateBlockSizeDeviation(-1, 0);
  ValidateBlockSizeDeviation(0, 0);
  ValidateBlockSizeDeviation(1, 1);
  ValidateBlockSizeDeviation(99, 99);
  ValidateBlockSizeDeviation(100, 100);
  ValidateBlockSizeDeviation(101, 0);
  ValidateBlockSizeDeviation(1000, 0);

  // invalid values for block_restart_interval (<1) are silently set to 1
  ValidateBlockRestartInterval(-10, 1);
  ValidateBlockRestartInterval(-1, 1);
  ValidateBlockRestartInterval(0, 1);
  ValidateBlockRestartInterval(1, 1);
  ValidateBlockRestartInterval(2, 2);
  ValidateBlockRestartInterval(1000, 1000);
}

2536
TEST_P(BlockBasedTableTest, BlockReadCountTest) {
I
Igor Canadi 已提交
2537 2538 2539 2540 2541 2542 2543 2544
  // bloom_filter_type = 0 -- block-based filter
  // bloom_filter_type = 0 -- full filter
  for (int bloom_filter_type = 0; bloom_filter_type < 2; ++bloom_filter_type) {
    for (int index_and_filter_in_cache = 0; index_and_filter_in_cache < 2;
         ++index_and_filter_in_cache) {
      Options options;
      options.create_if_missing = true;

2545
      BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
I
Igor Canadi 已提交
2546 2547 2548 2549 2550 2551
      table_options.block_cache = NewLRUCache(1, 0);
      table_options.cache_index_and_filter_blocks = index_and_filter_in_cache;
      table_options.filter_policy.reset(
          NewBloomFilterPolicy(10, bloom_filter_type == 0));
      options.table_factory.reset(new BlockBasedTableFactory(table_options));
      std::vector<std::string> keys;
I
Igor Canadi 已提交
2552
      stl_wrappers::KVMap kvmap;
I
Igor Canadi 已提交
2553 2554 2555 2556 2557 2558 2559

      TableConstructor c(BytewiseComparator());
      std::string user_key = "k04";
      InternalKey internal_key(user_key, 0, kTypeValue);
      std::string encoded_key = internal_key.Encode().ToString();
      c.Add(encoded_key, "hello");
      ImmutableCFOptions ioptions(options);
2560
      MutableCFOptions moptions(options);
I
Igor Canadi 已提交
2561
      // Generate table with filter policy
2562
      c.Finish(options, ioptions, moptions, table_options,
I
Igor Canadi 已提交
2563 2564
               GetPlainInternalComparator(options.comparator), &keys, &kvmap);
      auto reader = c.GetTableReader();
M
Maysam Yabandeh 已提交
2565
      PinnableSlice value;
2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583
      {
        GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
                               GetContext::kNotFound, user_key, &value, nullptr,
                               nullptr, nullptr, nullptr);
        get_perf_context()->Reset();
        ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
                              moptions.prefix_extractor.get()));
        if (index_and_filter_in_cache) {
          // data, index and filter block
          ASSERT_EQ(get_perf_context()->block_read_count, 3);
          ASSERT_EQ(get_perf_context()->index_block_read_count, 1);
          ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
        } else {
          // just the data block
          ASSERT_EQ(get_perf_context()->block_read_count, 1);
        }
        ASSERT_EQ(get_context.State(), GetContext::kFound);
        ASSERT_STREQ(value.data(), "hello");
I
Igor Canadi 已提交
2584 2585 2586 2587 2588 2589 2590
      }

      // Get non-existing key
      user_key = "does-not-exist";
      internal_key = InternalKey(user_key, 0, kTypeValue);
      encoded_key = internal_key.Encode().ToString();

M
Maysam Yabandeh 已提交
2591
      value.Reset();
2592 2593
      {
        GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
I
Igor Canadi 已提交
2594
                               GetContext::kNotFound, user_key, &value, nullptr,
A
Andrew Kryczka 已提交
2595
                               nullptr, nullptr, nullptr);
2596 2597 2598 2599 2600
        get_perf_context()->Reset();
        ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
                              moptions.prefix_extractor.get()));
        ASSERT_EQ(get_context.State(), GetContext::kNotFound);
      }
I
Igor Canadi 已提交
2601 2602 2603 2604

      if (index_and_filter_in_cache) {
        if (bloom_filter_type == 0) {
          // with block-based, we read index and then the filter
2605
          ASSERT_EQ(get_perf_context()->block_read_count, 2);
2606 2607
          ASSERT_EQ(get_perf_context()->index_block_read_count, 1);
          ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
I
Igor Canadi 已提交
2608 2609
        } else {
          // with full-filter, we read filter first and then we stop
2610
          ASSERT_EQ(get_perf_context()->block_read_count, 1);
2611
          ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
I
Igor Canadi 已提交
2612 2613 2614 2615
        }
      } else {
        // filter is already in memory and it figures out that the key doesn't
        // exist
2616
        ASSERT_EQ(get_perf_context()->block_read_count, 0);
I
Igor Canadi 已提交
2617 2618 2619 2620 2621
      }
    }
  }
}

M
Maysam Yabandeh 已提交
2622 2623 2624 2625 2626 2627 2628 2629 2630
// A wrapper around LRICache that also keeps track of data blocks (in contrast
// with the objects) in the cache. The class is very simple and can be used only
// for trivial tests.
class MockCache : public LRUCache {
 public:
  MockCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
            double high_pri_pool_ratio)
      : LRUCache(capacity, num_shard_bits, strict_capacity_limit,
                 high_pri_pool_ratio) {}
2631 2632 2633 2634
  Status Insert(const Slice& key, void* value, size_t charge,
                void (*deleter)(const Slice& key, void* value),
                Handle** handle = nullptr,
                Priority priority = Priority::LOW) override {
M
Maysam Yabandeh 已提交
2635 2636 2637 2638 2639 2640 2641
    // Replace the deleter with our own so that we keep track of data blocks
    // erased from the cache
    deleters_[key.ToString()] = deleter;
    return ShardedCache::Insert(key, value, charge, &MockDeleter, handle,
                                priority);
  }
  // This is called by the application right after inserting a data block
2642
  void TEST_mark_as_data_block(const Slice& key, size_t charge) override {
M
Maysam Yabandeh 已提交
2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669
    marked_data_in_cache_[key.ToString()] = charge;
    marked_size_ += charge;
  }
  using DeleterFunc = void (*)(const Slice& key, void* value);
  static std::map<std::string, DeleterFunc> deleters_;
  static std::map<std::string, size_t> marked_data_in_cache_;
  static size_t marked_size_;
  static void MockDeleter(const Slice& key, void* value) {
    // If the item was marked for being data block, decrease its usage from  the
    // total data block usage of the cache
    if (marked_data_in_cache_.find(key.ToString()) !=
        marked_data_in_cache_.end()) {
      marked_size_ -= marked_data_in_cache_[key.ToString()];
    }
    // Then call the origianl deleter
    assert(deleters_.find(key.ToString()) != deleters_.end());
    auto deleter = deleters_[key.ToString()];
    deleter(key, value);
  }
};

size_t MockCache::marked_size_ = 0;
std::map<std::string, MockCache::DeleterFunc> MockCache::deleters_;
std::map<std::string, size_t> MockCache::marked_data_in_cache_;

// Block cache can contain raw data blocks as well as general objects. If an
// object depends on the table to be live, it then must be destructed before the
M
Maysam Yabandeh 已提交
2670
// table is closed. This test makes sure that the only items remains in the
M
Maysam Yabandeh 已提交
2671
// cache after the table is closed are raw data blocks.
2672
TEST_P(BlockBasedTableTest, NoObjectInCacheAfterTableClose) {
2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688
  std::vector<CompressionType> compression_types{kNoCompression};

  // The following are the compression library versions supporting compression
  // dictionaries. See the test case CacheCompressionDict in the
  // DBBlockCacheTest suite.
#ifdef ZLIB
  compression_types.push_back(kZlibCompression);
#endif  // ZLIB
#if LZ4_VERSION_NUMBER >= 10400
  compression_types.push_back(kLZ4Compression);
  compression_types.push_back(kLZ4HCCompression);
#endif  // LZ4_VERSION_NUMBER >= 10400
#if ZSTD_VERSION_NUMBER >= 500
  compression_types.push_back(kZSTD);
#endif  // ZSTD_VERSION_NUMBER >= 500

2689
  for (int level: {-1, 0, 1, 10}) {
2690 2691
    for (auto index_type :
        {BlockBasedTableOptions::IndexType::kBinarySearch,
M
Maysam Yabandeh 已提交
2692
        BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch}) {
2693 2694 2695 2696 2697 2698 2699
      for (bool block_based_filter : {true, false}) {
        for (bool partition_filter : {true, false}) {
          if (partition_filter &&
              (block_based_filter ||
               index_type !=
               BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch)) {
            continue;
2700
          }
2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784
          for (bool index_and_filter_in_cache : {true, false}) {
            for (bool pin_l0 : {true, false}) {
              for (bool pin_top_level : {true, false}) {
                if (pin_l0 && !index_and_filter_in_cache) {
                  continue;
                }

                for (auto compression_type : compression_types) {
                  for (uint32_t max_dict_bytes : {0, 1 << 14}) {
                    if (compression_type == kNoCompression && max_dict_bytes)
                      continue;

                    // Create a table
                    Options opt;
                    std::unique_ptr<InternalKeyComparator> ikc;
                    ikc.reset(new test::PlainInternalKeyComparator(
                      opt.comparator));
                    opt.compression = compression_type;
                    opt.compression_opts.max_dict_bytes = max_dict_bytes;
                    BlockBasedTableOptions table_options =
                      GetBlockBasedTableOptions();
                    table_options.block_size = 1024;
                    table_options.index_type = index_type;
                    table_options.pin_l0_filter_and_index_blocks_in_cache =
                      pin_l0;
                    table_options.pin_top_level_index_and_filter =
                      pin_top_level;
                    table_options.partition_filters = partition_filter;
                    table_options.cache_index_and_filter_blocks =
                      index_and_filter_in_cache;
                    // big enough so we don't ever lose cached values.
                    table_options.block_cache = std::make_shared<MockCache>(
                      16 * 1024 * 1024, 4, false, 0.0);
                    table_options.filter_policy.reset(
                      rocksdb::NewBloomFilterPolicy(10, block_based_filter));
                    opt.table_factory.reset(NewBlockBasedTableFactory(
                      table_options));

                    bool convert_to_internal_key = false;
                    TableConstructor c(BytewiseComparator(),
                      convert_to_internal_key, level);
                    std::string user_key = "k01";
                    std::string key =
                      InternalKey(user_key, 0, kTypeValue).Encode().ToString();
                    c.Add(key, "hello");
                    std::vector<std::string> keys;
                    stl_wrappers::KVMap kvmap;
                    const ImmutableCFOptions ioptions(opt);
                    const MutableCFOptions moptions(opt);
                    c.Finish(opt, ioptions, moptions, table_options, *ikc,
                      &keys, &kvmap);

                    // Doing a read to make index/filter loaded into the cache
                    auto table_reader =
                      dynamic_cast<BlockBasedTable*>(c.GetTableReader());
                    PinnableSlice value;
                    GetContext get_context(opt.comparator, nullptr, nullptr,
                      nullptr, GetContext::kNotFound, user_key, &value,
                      nullptr, nullptr, nullptr, nullptr);
                    InternalKey ikey(user_key, 0, kTypeValue);
                    auto s = table_reader->Get(ReadOptions(), key, &get_context,
                      moptions.prefix_extractor.get());
                    ASSERT_EQ(get_context.State(), GetContext::kFound);
                    ASSERT_STREQ(value.data(), "hello");

                    // Close the table
                    c.ResetTableReader();

                    auto usage = table_options.block_cache->GetUsage();
                    auto pinned_usage =
                      table_options.block_cache->GetPinnedUsage();
                    // The only usage must be for marked data blocks
                    ASSERT_EQ(usage, MockCache::marked_size_);
                    // There must be some pinned data since PinnableSlice has
                    // not released them yet
                    ASSERT_GT(pinned_usage, 0);
                    // Release pinnable slice reousrces
                    value.Reset();
                    pinned_usage = table_options.block_cache->GetPinnedUsage();
                    ASSERT_EQ(pinned_usage, 0);
                  }
                }
              }
            }
M
Maysam Yabandeh 已提交
2785 2786 2787 2788
          }
        }
      }
    }
2789
  } // level
M
Maysam Yabandeh 已提交
2790 2791
}

2792
TEST_P(BlockBasedTableTest, BlockCacheLeak) {
K
Kai Liu 已提交
2793 2794 2795 2796 2797
  // Check that when we reopen a table we don't lose access to blocks already
  // in the cache. This test checks whether the Table actually makes use of the
  // unique ID from the file.

  Options opt;
2798
  std::unique_ptr<InternalKeyComparator> ikc;
2799
  ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
K
Kai Liu 已提交
2800
  opt.compression = kNoCompression;
2801
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2802 2803
  table_options.block_size = 1024;
  // big enough so we don't ever lose cached values.
2804
  table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
2805
  opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
K
Kai Liu 已提交
2806

2807
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
K
Kai Liu 已提交
2808 2809 2810 2811 2812 2813 2814 2815
  c.Add("k01", "hello");
  c.Add("k02", "hello2");
  c.Add("k03", std::string(10000, 'x'));
  c.Add("k04", std::string(200000, 'x'));
  c.Add("k05", std::string(300000, 'x'));
  c.Add("k06", "hello3");
  c.Add("k07", std::string(100000, 'x'));
  std::vector<std::string> keys;
2816
  stl_wrappers::KVMap kvmap;
L
Lei Jin 已提交
2817
  const ImmutableCFOptions ioptions(opt);
2818 2819
  const MutableCFOptions moptions(opt);
  c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
K
Kai Liu 已提交
2820

2821
  std::unique_ptr<InternalIterator> iter(
2822
      c.NewIterator(moptions.prefix_extractor.get()));
K
Kai Liu 已提交
2823 2824 2825 2826 2827 2828 2829
  iter->SeekToFirst();
  while (iter->Valid()) {
    iter->key();
    iter->value();
    iter->Next();
  }
  ASSERT_OK(iter->status());
2830
  iter.reset();
K
Kai Liu 已提交
2831

L
Lei Jin 已提交
2832
  const ImmutableCFOptions ioptions1(opt);
2833 2834
  const MutableCFOptions moptions1(opt);
  ASSERT_OK(c.Reopen(ioptions1, moptions1));
2835
  auto table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
K
Kai Liu 已提交
2836
  for (const std::string& key : keys) {
M
Maysam Yabandeh 已提交
2837 2838
    InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
    ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
K
Kai Liu 已提交
2839
  }
2840
  c.ResetTableReader();
I
Igor Canadi 已提交
2841 2842

  // rerun with different block cache
2843
  table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
2844
  opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
L
Lei Jin 已提交
2845
  const ImmutableCFOptions ioptions2(opt);
2846 2847
  const MutableCFOptions moptions2(opt);
  ASSERT_OK(c.Reopen(ioptions2, moptions2));
2848
  table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
I
Igor Canadi 已提交
2849
  for (const std::string& key : keys) {
M
Maysam Yabandeh 已提交
2850 2851
    InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
    ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
I
Igor Canadi 已提交
2852
  }
2853
  c.ResetTableReader();
K
Kai Liu 已提交
2854 2855
}

2856
namespace {
Y
Yi Wu 已提交
2857
class CustomMemoryAllocator : public MemoryAllocator {
2858
 public:
2859
  const char* Name() const override { return "CustomMemoryAllocator"; }
2860 2861 2862 2863

  void* Allocate(size_t size) override {
    ++numAllocations;
    auto ptr = new char[size + 16];
Y
Yi Wu 已提交
2864
    memcpy(ptr, "memory_allocator_", 16);  // mangle first 16 bytes
2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877
    return reinterpret_cast<void*>(ptr + 16);
  }
  void Deallocate(void* p) override {
    ++numDeallocations;
    char* ptr = reinterpret_cast<char*>(p) - 16;
    delete[] ptr;
  }

  std::atomic<int> numAllocations;
  std::atomic<int> numDeallocations;
};
}  // namespace

Y
Yi Wu 已提交
2878 2879
TEST_P(BlockBasedTableTest, MemoryAllocator) {
  auto custom_memory_allocator = std::make_shared<CustomMemoryAllocator>();
2880 2881
  {
    Options opt;
2882
    std::unique_ptr<InternalKeyComparator> ikc;
2883 2884 2885 2886 2887
    ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
    opt.compression = kNoCompression;
    BlockBasedTableOptions table_options;
    table_options.block_size = 1024;
    LRUCacheOptions lruOptions;
2888
    lruOptions.memory_allocator = custom_memory_allocator;
2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908
    lruOptions.capacity = 16 * 1024 * 1024;
    lruOptions.num_shard_bits = 4;
    table_options.block_cache = NewLRUCache(std::move(lruOptions));
    opt.table_factory.reset(NewBlockBasedTableFactory(table_options));

    TableConstructor c(BytewiseComparator(),
                       true /* convert_to_internal_key_ */);
    c.Add("k01", "hello");
    c.Add("k02", "hello2");
    c.Add("k03", std::string(10000, 'x'));
    c.Add("k04", std::string(200000, 'x'));
    c.Add("k05", std::string(300000, 'x'));
    c.Add("k06", "hello3");
    c.Add("k07", std::string(100000, 'x'));
    std::vector<std::string> keys;
    stl_wrappers::KVMap kvmap;
    const ImmutableCFOptions ioptions(opt);
    const MutableCFOptions moptions(opt);
    c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);

2909
    std::unique_ptr<InternalIterator> iter(
2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921
        c.NewIterator(moptions.prefix_extractor.get()));
    iter->SeekToFirst();
    while (iter->Valid()) {
      iter->key();
      iter->value();
      iter->Next();
    }
    ASSERT_OK(iter->status());
  }

  // out of scope, block cache should have been deleted, all allocations
  // deallocated
Y
Yi Wu 已提交
2922 2923
  EXPECT_EQ(custom_memory_allocator->numAllocations.load(),
            custom_memory_allocator->numDeallocations.load());
2924
  // make sure that allocations actually happened through the cache allocator
Y
Yi Wu 已提交
2925
  EXPECT_GT(custom_memory_allocator->numAllocations.load(), 0);
2926 2927
}

2928 2929
// Plain table is not supported in ROCKSDB_LITE
#ifndef ROCKSDB_LITE
I
Igor Sugak 已提交
2930
TEST_F(PlainTableTest, BasicPlainTableProperties) {
S
Stanislau Hlebik 已提交
2931 2932 2933 2934 2935 2936
  PlainTableOptions plain_table_options;
  plain_table_options.user_key_len = 8;
  plain_table_options.bloom_bits_per_key = 8;
  plain_table_options.hash_table_ratio = 0;

  PlainTableFactory factory(plain_table_options);
A
Andres Notzli 已提交
2937
  test::StringSink sink;
2938
  std::unique_ptr<WritableFileWriter> file_writer(
2939
      test::GetWritableFileWriter(new test::StringSink(), "" /* don't care */));
2940
  Options options;
L
Lei Jin 已提交
2941
  const ImmutableCFOptions ioptions(options);
2942
  const MutableCFOptions moptions(options);
2943
  InternalKeyComparator ikc(options.comparator);
2944 2945
  std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
      int_tbl_prop_collector_factories;
2946
  std::string column_family_name;
2947
  int unknown_level = -1;
2948
  std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
2949 2950 2951 2952
      TableBuilderOptions(
          ioptions, moptions, ikc, &int_tbl_prop_collector_factories,
          kNoCompression, 0 /* sample_for_compression */, CompressionOptions(),
          false /* skip_filters */, column_family_name, unknown_level),
2953
      TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
2954
      file_writer.get()));
K
Kai Liu 已提交
2955 2956

  for (char c = 'a'; c <= 'z'; ++c) {
2957 2958
    std::string key(8, c);
    key.append("\1       ");  // PlainTable expects internal key structure
K
Kai Liu 已提交
2959 2960 2961 2962
    std::string value(28, c + 42);
    builder->Add(key, value);
  }
  ASSERT_OK(builder->Finish());
2963
  file_writer->Flush();
K
Kai Liu 已提交
2964

A
Andres Notzli 已提交
2965 2966
  test::StringSink* ss =
    static_cast<test::StringSink*>(file_writer->writable_file());
2967
  std::unique_ptr<RandomAccessFileReader> file_reader(
2968
      test::GetRandomAccessFileReader(
A
Andres Notzli 已提交
2969
          new test::StringSource(ss->contents(), 72242, true)));
K
Kai Liu 已提交
2970

K
kailiu 已提交
2971
  TableProperties* props = nullptr;
2972
  auto s = ReadTableProperties(file_reader.get(), ss->contents().size(),
2973
                               kPlainTableMagicNumber, ioptions,
2974
                               &props, true /* compression_type_missing */);
K
Kai Liu 已提交
2975
  std::unique_ptr<TableProperties> props_guard(props);
K
Kai Liu 已提交
2976 2977
  ASSERT_OK(s);

K
kailiu 已提交
2978 2979 2980 2981 2982 2983
  ASSERT_EQ(0ul, props->index_size);
  ASSERT_EQ(0ul, props->filter_size);
  ASSERT_EQ(16ul * 26, props->raw_key_size);
  ASSERT_EQ(28ul * 26, props->raw_value_size);
  ASSERT_EQ(26ul, props->num_entries);
  ASSERT_EQ(1ul, props->num_data_blocks);
K
Kai Liu 已提交
2984
}
2985
#endif  // !ROCKSDB_LITE
K
Kai Liu 已提交
2986

I
Igor Sugak 已提交
2987
TEST_F(GeneralTableTest, ApproximateOffsetOfPlain) {
2988
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
J
jorlow@chromium.org 已提交
2989 2990 2991 2992 2993 2994 2995 2996
  c.Add("k01", "hello");
  c.Add("k02", "hello2");
  c.Add("k03", std::string(10000, 'x'));
  c.Add("k04", std::string(200000, 'x'));
  c.Add("k05", std::string(300000, 'x'));
  c.Add("k06", "hello3");
  c.Add("k07", std::string(100000, 'x'));
  std::vector<std::string> keys;
2997
  stl_wrappers::KVMap kvmap;
2998
  Options options;
2999
  test::PlainInternalKeyComparator internal_comparator(options.comparator);
J
jorlow@chromium.org 已提交
3000
  options.compression = kNoCompression;
3001 3002
  BlockBasedTableOptions table_options;
  table_options.block_size = 1024;
L
Lei Jin 已提交
3003
  const ImmutableCFOptions ioptions(options);
3004 3005
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options, internal_comparator,
L
Lei Jin 已提交
3006
           &keys, &kvmap);
J
jorlow@chromium.org 已提交
3007 3008 3009 3010 3011 3012 3013

  ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"),       0,      0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"),       0,      0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"),      0,      0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"),       0,      0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"),       0,      0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"),   10000,  11000));
3014 3015 3016
  // k04 and k05 will be in two consecutive blocks, the index is
  // an arbitrary slice between k04 and k05, either before or after k04a
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 10000, 211000));
J
jorlow@chromium.org 已提交
3017 3018 3019
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"),  210000, 211000));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"),  510000, 511000));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"),  510000, 511000));
S
Sanjay Ghemawat 已提交
3020
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"),  610000, 612000));
3021
  c.ResetTableReader();
J
jorlow@chromium.org 已提交
3022 3023
}

K
Kai Liu 已提交
3024
static void DoCompressionTest(CompressionType comp) {
J
jorlow@chromium.org 已提交
3025
  Random rnd(301);
3026
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
J
jorlow@chromium.org 已提交
3027 3028 3029 3030 3031 3032
  std::string tmp;
  c.Add("k01", "hello");
  c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
  c.Add("k03", "hello3");
  c.Add("k04", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
  std::vector<std::string> keys;
3033
  stl_wrappers::KVMap kvmap;
3034
  Options options;
3035
  test::PlainInternalKeyComparator ikc(options.comparator);
H
heyongqiang 已提交
3036
  options.compression = comp;
3037 3038
  BlockBasedTableOptions table_options;
  table_options.block_size = 1024;
L
Lei Jin 已提交
3039
  const ImmutableCFOptions ioptions(options);
3040 3041
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options, ikc, &keys, &kvmap);
J
jorlow@chromium.org 已提交
3042 3043 3044 3045

  ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"),       0,      0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"),       0,      0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"),       0,      0));
3046 3047 3048
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"),    2000,   3500));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"),    2000,   3500));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"),    4000,   6500));
3049
  c.ResetTableReader();
J
jorlow@chromium.org 已提交
3050 3051
}

I
Igor Sugak 已提交
3052
TEST_F(GeneralTableTest, ApproximateOffsetOfCompressed) {
K
kailiu 已提交
3053
  std::vector<CompressionType> compression_state;
I
Igor Canadi 已提交
3054
  if (!Snappy_Supported()) {
H
heyongqiang 已提交
3055 3056
    fprintf(stderr, "skipping snappy compression tests\n");
  } else {
K
kailiu 已提交
3057
    compression_state.push_back(kSnappyCompression);
H
heyongqiang 已提交
3058 3059
  }

I
Igor Canadi 已提交
3060
  if (!Zlib_Supported()) {
H
heyongqiang 已提交
3061 3062
    fprintf(stderr, "skipping zlib compression tests\n");
  } else {
K
kailiu 已提交
3063
    compression_state.push_back(kZlibCompression);
H
heyongqiang 已提交
3064 3065
  }

K
kailiu 已提交
3066 3067
  // TODO(kailiu) DoCompressionTest() doesn't work with BZip2.
  /*
I
Igor Canadi 已提交
3068
  if (!BZip2_Supported()) {
A
Albert Strasheim 已提交
3069 3070
    fprintf(stderr, "skipping bzip2 compression tests\n");
  } else {
K
kailiu 已提交
3071
    compression_state.push_back(kBZip2Compression);
A
Albert Strasheim 已提交
3072
  }
K
kailiu 已提交
3073
  */
A
Albert Strasheim 已提交
3074

I
Igor Canadi 已提交
3075 3076
  if (!LZ4_Supported()) {
    fprintf(stderr, "skipping lz4 and lz4hc compression tests\n");
A
Albert Strasheim 已提交
3077
  } else {
K
kailiu 已提交
3078 3079
    compression_state.push_back(kLZ4Compression);
    compression_state.push_back(kLZ4HCCompression);
H
heyongqiang 已提交
3080 3081
  }

3082 3083 3084 3085 3086 3087 3088
  if (!XPRESS_Supported()) {
    fprintf(stderr, "skipping xpress and xpress compression tests\n");
  }
  else {
    compression_state.push_back(kXpressCompression);
  }

K
kailiu 已提交
3089 3090
  for (auto state : compression_state) {
    DoCompressionTest(state);
H
heyongqiang 已提交
3091 3092 3093
  }
}

3094
#ifndef ROCKSDB_VALGRIND_RUN
3095 3096
// RandomizedHarnessTest is very slow for certain combination of arguments
// Split into 8 pieces to reduce the time individual tests take.
3097 3098
TEST_F(HarnessTest, Randomized1) {
  // part 1 out of 8
3099
  const size_t part = 1;
3100
  const size_t total = 8;
3101 3102 3103
  RandomizedHarnessTest(part, total);
}

3104 3105 3106 3107 3108 3109 3110 3111 3112
TEST_F(HarnessTest, Randomized2) {
  // part 2 out of 8
  const size_t part = 2;
  const size_t total = 8;
  RandomizedHarnessTest(part, total);
}

TEST_F(HarnessTest, Randomized3) {
  // part 3 out of 8
3113 3114 3115
  const size_t part = 3;
  const size_t total = 8;
  RandomizedHarnessTest(part, total);
3116 3117 3118 3119 3120 3121 3122
}

TEST_F(HarnessTest, Randomized4) {
  // part 4 out of 8
  const size_t part = 4;
  const size_t total = 8;
  RandomizedHarnessTest(part, total);
3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149
}

TEST_F(HarnessTest, Randomized5) {
  // part 5 out of 8
  const size_t part = 5;
  const size_t total = 8;
  RandomizedHarnessTest(part, total);
}

TEST_F(HarnessTest, Randomized6) {
  // part 6 out of 8
  const size_t part = 6;
  const size_t total = 8;
  RandomizedHarnessTest(part, total);
}

TEST_F(HarnessTest, Randomized7) {
  // part 7 out of 8
  const size_t part = 7;
  const size_t total = 8;
  RandomizedHarnessTest(part, total);
}

TEST_F(HarnessTest, Randomized8) {
  // part 8 out of 8
  const size_t part = 8;
  const size_t total = 8;
3150
  RandomizedHarnessTest(part, total);
3151 3152
}

3153
#ifndef ROCKSDB_LITE
I
Igor Sugak 已提交
3154
TEST_F(HarnessTest, RandomizedLongDB) {
3155
  Random rnd(test::RandomSeed());
3156
  TestArgs args = {DB_TEST, false, 16, kNoCompression, 0, false};
3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176
  Init(args);
  int num_entries = 100000;
  for (int e = 0; e < num_entries; e++) {
    std::string v;
    Add(test::RandomKey(&rnd, rnd.Skewed(4)),
        test::RandomString(&rnd, rnd.Skewed(5), &v).ToString());
  }
  Test(&rnd);

  // We must have created enough data to force merging
  int files = 0;
  for (int level = 0; level < db()->NumberLevels(); level++) {
    std::string value;
    char name[100];
    snprintf(name, sizeof(name), "rocksdb.num-files-at-level%d", level);
    ASSERT_TRUE(db()->GetProperty(name, &value));
    files += atoi(value.c_str());
  }
  ASSERT_GT(files, 0);
}
3177
#endif  // ROCKSDB_LITE
3178
#endif  // ROCKSDB_VALGRIND_RUN
3179

I
Igor Sugak 已提交
3180
class MemTableTest : public testing::Test {};
3181

I
Igor Sugak 已提交
3182
TEST_F(MemTableTest, Simple) {
3183 3184
  InternalKeyComparator cmp(BytewiseComparator());
  auto table_factory = std::make_shared<SkipListFactory>();
I
Igor Canadi 已提交
3185 3186
  Options options;
  options.memtable_factory = table_factory;
3187
  ImmutableCFOptions ioptions(options);
3188
  WriteBufferManager wb(options.db_write_buffer_size);
3189 3190 3191
  MemTable* memtable =
      new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
                   kMaxSequenceNumber, 0 /* column_family_id */);
3192 3193 3194 3195 3196 3197 3198
  memtable->Ref();
  WriteBatch batch;
  WriteBatchInternal::SetSequence(&batch, 100);
  batch.Put(std::string("k1"), std::string("v1"));
  batch.Put(std::string("k2"), std::string("v2"));
  batch.Put(std::string("k3"), std::string("v3"));
  batch.Put(std::string("largekey"), std::string("vlarge"));
3199 3200
  batch.DeleteRange(std::string("chi"), std::string("xigua"));
  batch.DeleteRange(std::string("begin"), std::string("end"));
3201
  ColumnFamilyMemTablesDefault cf_mems_default(memtable);
3202 3203
  ASSERT_TRUE(
      WriteBatchInternal::InsertInto(&batch, &cf_mems_default, nullptr).ok());
3204

3205 3206
  for (int i = 0; i < 2; ++i) {
    Arena arena;
3207 3208 3209 3210 3211 3212 3213
    ScopedArenaIterator arena_iter_guard;
    std::unique_ptr<InternalIterator> iter_guard;
    InternalIterator* iter;
    if (i == 0) {
      iter = memtable->NewIterator(ReadOptions(), &arena);
      arena_iter_guard.set(iter);
    } else {
3214 3215
      iter = memtable->NewRangeTombstoneIterator(
          ReadOptions(), kMaxSequenceNumber /* read_seq */);
3216 3217
      iter_guard.reset(iter);
    }
A
Andrew Kryczka 已提交
3218 3219 3220
    if (iter == nullptr) {
      continue;
    }
3221 3222 3223 3224 3225 3226
    iter->SeekToFirst();
    while (iter->Valid()) {
      fprintf(stderr, "key: '%s' -> '%s'\n", iter->key().ToString().c_str(),
              iter->value().ToString().c_str());
      iter->Next();
    }
3227 3228
  }

3229
  delete memtable->Unref();
3230 3231
}

3232
// Test the empty key
I
Igor Sugak 已提交
3233
TEST_F(HarnessTest, SimpleEmptyKey) {
K
Kai Liu 已提交
3234 3235 3236
  auto args = GenerateArgList();
  for (const auto& arg : args) {
    Init(arg);
3237 3238 3239 3240 3241 3242
    Random rnd(test::RandomSeed() + 1);
    Add("", "v");
    Test(&rnd);
  }
}

I
Igor Sugak 已提交
3243
TEST_F(HarnessTest, SimpleSingle) {
K
Kai Liu 已提交
3244 3245 3246
  auto args = GenerateArgList();
  for (const auto& arg : args) {
    Init(arg);
3247 3248 3249 3250 3251 3252
    Random rnd(test::RandomSeed() + 2);
    Add("abc", "v");
    Test(&rnd);
  }
}

I
Igor Sugak 已提交
3253
TEST_F(HarnessTest, SimpleMulti) {
K
Kai Liu 已提交
3254 3255 3256
  auto args = GenerateArgList();
  for (const auto& arg : args) {
    Init(arg);
3257 3258 3259 3260 3261 3262 3263 3264
    Random rnd(test::RandomSeed() + 3);
    Add("abc", "v");
    Add("abcd", "v");
    Add("ac", "v2");
    Test(&rnd);
  }
}

I
Igor Sugak 已提交
3265
TEST_F(HarnessTest, SimpleSpecialKey) {
K
Kai Liu 已提交
3266 3267 3268
  auto args = GenerateArgList();
  for (const auto& arg : args) {
    Init(arg);
3269 3270 3271 3272 3273
    Random rnd(test::RandomSeed() + 4);
    Add("\xff\xff", "v3");
    Test(&rnd);
  }
}
3274

I
Igor Sugak 已提交
3275
TEST_F(HarnessTest, FooterTests) {
I
xxHash  
Igor Canadi 已提交
3276 3277 3278
  {
    // upconvert legacy block based
    std::string encoded;
3279
    Footer footer(kLegacyBlockBasedTableMagicNumber, 0);
I
xxHash  
Igor Canadi 已提交
3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292
    BlockHandle meta_index(10, 5), index(20, 15);
    footer.set_metaindex_handle(meta_index);
    footer.set_index_handle(index);
    footer.EncodeTo(&encoded);
    Footer decoded_footer;
    Slice encoded_slice(encoded);
    decoded_footer.DecodeFrom(&encoded_slice);
    ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
    ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
    ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
    ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
    ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
    ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3293
    ASSERT_EQ(decoded_footer.version(), 0U);
I
xxHash  
Igor Canadi 已提交
3294 3295 3296 3297
  }
  {
    // xxhash block based
    std::string encoded;
3298
    Footer footer(kBlockBasedTableMagicNumber, 1);
I
xxHash  
Igor Canadi 已提交
3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312
    BlockHandle meta_index(10, 5), index(20, 15);
    footer.set_metaindex_handle(meta_index);
    footer.set_index_handle(index);
    footer.set_checksum(kxxHash);
    footer.EncodeTo(&encoded);
    Footer decoded_footer;
    Slice encoded_slice(encoded);
    decoded_footer.DecodeFrom(&encoded_slice);
    ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
    ASSERT_EQ(decoded_footer.checksum(), kxxHash);
    ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
    ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
    ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
    ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3313
    ASSERT_EQ(decoded_footer.version(), 1U);
I
xxHash  
Igor Canadi 已提交
3314
  }
B
Bo Hou 已提交
3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334
  {
    // xxhash64 block based
    std::string encoded;
    Footer footer(kBlockBasedTableMagicNumber, 1);
    BlockHandle meta_index(10, 5), index(20, 15);
    footer.set_metaindex_handle(meta_index);
    footer.set_index_handle(index);
    footer.set_checksum(kxxHash64);
    footer.EncodeTo(&encoded);
    Footer decoded_footer;
    Slice encoded_slice(encoded);
    decoded_footer.DecodeFrom(&encoded_slice);
    ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
    ASSERT_EQ(decoded_footer.checksum(), kxxHash64);
    ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
    ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
    ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
    ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
    ASSERT_EQ(decoded_footer.version(), 1U);
  }
3335 3336
// Plain table is not supported in ROCKSDB_LITE
#ifndef ROCKSDB_LITE
I
xxHash  
Igor Canadi 已提交
3337 3338 3339
  {
    // upconvert legacy plain table
    std::string encoded;
3340
    Footer footer(kLegacyPlainTableMagicNumber, 0);
I
xxHash  
Igor Canadi 已提交
3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353
    BlockHandle meta_index(10, 5), index(20, 15);
    footer.set_metaindex_handle(meta_index);
    footer.set_index_handle(index);
    footer.EncodeTo(&encoded);
    Footer decoded_footer;
    Slice encoded_slice(encoded);
    decoded_footer.DecodeFrom(&encoded_slice);
    ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
    ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
    ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
    ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
    ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
    ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3354
    ASSERT_EQ(decoded_footer.version(), 0U);
I
xxHash  
Igor Canadi 已提交
3355 3356 3357 3358
  }
  {
    // xxhash block based
    std::string encoded;
3359
    Footer footer(kPlainTableMagicNumber, 1);
I
xxHash  
Igor Canadi 已提交
3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373
    BlockHandle meta_index(10, 5), index(20, 15);
    footer.set_metaindex_handle(meta_index);
    footer.set_index_handle(index);
    footer.set_checksum(kxxHash);
    footer.EncodeTo(&encoded);
    Footer decoded_footer;
    Slice encoded_slice(encoded);
    decoded_footer.DecodeFrom(&encoded_slice);
    ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
    ASSERT_EQ(decoded_footer.checksum(), kxxHash);
    ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
    ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
    ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
    ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3374 3375
    ASSERT_EQ(decoded_footer.version(), 1U);
  }
3376
#endif  // !ROCKSDB_LITE
3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394
  {
    // version == 2
    std::string encoded;
    Footer footer(kBlockBasedTableMagicNumber, 2);
    BlockHandle meta_index(10, 5), index(20, 15);
    footer.set_metaindex_handle(meta_index);
    footer.set_index_handle(index);
    footer.EncodeTo(&encoded);
    Footer decoded_footer;
    Slice encoded_slice(encoded);
    decoded_footer.DecodeFrom(&encoded_slice);
    ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
    ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
    ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
    ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
    ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
    ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
    ASSERT_EQ(decoded_footer.version(), 2U);
I
xxHash  
Igor Canadi 已提交
3395 3396 3397
  }
}

3398
class IndexBlockRestartIntervalTest
3399
    : public TableTest,
3400
      public ::testing::WithParamInterface<std::pair<int, bool>> {
3401
 public:
3402 3403 3404 3405 3406
  static std::vector<std::pair<int, bool>> GetRestartValues() {
    return {{-1, false}, {0, false},  {1, false}, {8, false},
            {16, false}, {32, false}, {-1, true}, {0, true},
            {1, true},   {8, true},   {16, true}, {32, true}};
  }
3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417
};

INSTANTIATE_TEST_CASE_P(
    IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest,
    ::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues()));

TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) {
  const int kKeysInTable = 10000;
  const int kKeySize = 100;
  const int kValSize = 500;

3418 3419
  const int index_block_restart_interval = std::get<0>(GetParam());
  const bool value_delta_encoding = std::get<1>(GetParam());
3420 3421 3422 3423 3424

  Options options;
  BlockBasedTableOptions table_options;
  table_options.block_size = 64;  // small block size to get big index block
  table_options.index_block_restart_interval = index_block_restart_interval;
3425 3426 3427
  if (value_delta_encoding) {
    table_options.format_version = 4;
  }
3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441
  options.table_factory.reset(new BlockBasedTableFactory(table_options));

  TableConstructor c(BytewiseComparator());
  static Random rnd(301);
  for (int i = 0; i < kKeysInTable; i++) {
    InternalKey k(RandomString(&rnd, kKeySize), 0, kTypeValue);
    c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize));
  }

  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  std::unique_ptr<InternalKeyComparator> comparator(
      new InternalKeyComparator(BytewiseComparator()));
  const ImmutableCFOptions ioptions(options);
3442 3443 3444
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
           &kvmap);
3445 3446
  auto reader = c.GetTableReader();

3447 3448 3449
  std::unique_ptr<InternalIterator> db_iter(reader->NewIterator(
      ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized));
3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468

  // Test point lookup
  for (auto& kv : kvmap) {
    db_iter->Seek(kv.first);

    ASSERT_TRUE(db_iter->Valid());
    ASSERT_OK(db_iter->status());
    ASSERT_EQ(db_iter->key(), kv.first);
    ASSERT_EQ(db_iter->value(), kv.second);
  }

  // Test iterating
  auto kv_iter = kvmap.begin();
  for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
    ASSERT_EQ(db_iter->key(), kv_iter->first);
    ASSERT_EQ(db_iter->value(), kv_iter->second);
    kv_iter++;
  }
  ASSERT_EQ(kv_iter, kvmap.end());
3469
  c.ResetTableReader();
3470 3471
}

3472 3473 3474
class PrefixTest : public testing::Test {
 public:
  PrefixTest() : testing::Test() {}
3475
  ~PrefixTest() override {}
3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494
};

namespace {
// A simple PrefixExtractor that only works for test PrefixAndWholeKeyTest
class TestPrefixExtractor : public rocksdb::SliceTransform {
 public:
  ~TestPrefixExtractor() override{};
  const char* Name() const override { return "TestPrefixExtractor"; }

  rocksdb::Slice Transform(const rocksdb::Slice& src) const override {
    assert(IsValid(src));
    return rocksdb::Slice(src.data(), 3);
  }

  bool InDomain(const rocksdb::Slice& src) const override {
    assert(IsValid(src));
    return true;
  }

A
Andrew Kryczka 已提交
3495
  bool InRange(const rocksdb::Slice& /*dst*/) const override { return true; }
3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530

  bool IsValid(const rocksdb::Slice& src) const {
    if (src.size() != 4) {
      return false;
    }
    if (src[0] != '[') {
      return false;
    }
    if (src[1] < '0' || src[1] > '9') {
      return false;
    }
    if (src[2] != ']') {
      return false;
    }
    if (src[3] < '0' || src[3] > '9') {
      return false;
    }
    return true;
  }
};
}  // namespace

TEST_F(PrefixTest, PrefixAndWholeKeyTest) {
  rocksdb::Options options;
  options.compaction_style = rocksdb::kCompactionStyleUniversal;
  options.num_levels = 20;
  options.create_if_missing = true;
  options.optimize_filters_for_hits = false;
  options.target_file_size_base = 268435456;
  options.prefix_extractor = std::make_shared<TestPrefixExtractor>();
  rocksdb::BlockBasedTableOptions bbto;
  bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10));
  bbto.block_size = 262144;
  bbto.whole_key_filtering = true;

3531
  const std::string kDBPath = test::PerThreadDBPath("table_prefix_test");
3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  DestroyDB(kDBPath, options);
  rocksdb::DB* db;
  ASSERT_OK(rocksdb::DB::Open(options, kDBPath, &db));

  // Create a bunch of keys with 10 filters.
  for (int i = 0; i < 10; i++) {
    std::string prefix = "[" + std::to_string(i) + "]";
    for (int j = 0; j < 10; j++) {
      std::string key = prefix + std::to_string(j);
      db->Put(rocksdb::WriteOptions(), key, "1");
    }
  }

  // Trigger compaction.
  db->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  delete db;
  // In the second round, turn whole_key_filtering off and expect
  // rocksdb still works.
}

3553 3554 3555 3556 3557 3558 3559 3560
/*
 * Disable TableWithGlobalSeqno since RocksDB does not store global_seqno in
 * the SST file any more. Instead, RocksDB deduces global_seqno from the
 * MANIFEST while reading from an SST. Therefore, it's not possible to test the
 * functionality of global_seqno in a single, isolated unit test without the
 * involvement of Version, VersionSet, etc.
 */
TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) {
3561
  BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
3562
  test::StringSink* sink = new test::StringSink();
3563
  std::unique_ptr<WritableFileWriter> file_writer(
3564
      test::GetWritableFileWriter(sink, "" /* don't care */));
3565 3566 3567
  Options options;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  const ImmutableCFOptions ioptions(options);
3568
  const MutableCFOptions moptions(options);
3569 3570 3571 3572 3573 3574 3575 3576
  InternalKeyComparator ikc(options.comparator);
  std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
      int_tbl_prop_collector_factories;
  int_tbl_prop_collector_factories.emplace_back(
      new SstFileWriterPropertiesCollectorFactory(2 /* version */,
                                                  0 /* global_seqno*/));
  std::string column_family_name;
  std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
3577 3578
      TableBuilderOptions(ioptions, moptions, ikc,
                          &int_tbl_prop_collector_factories, kNoCompression,
3579 3580
                          0 /* sample_for_compression */, CompressionOptions(),
                          false /* skip_filters */, column_family_name, -1),
3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600
      TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
      file_writer.get()));

  for (char c = 'a'; c <= 'z'; ++c) {
    std::string key(8, c);
    std::string value = key;
    InternalKey ik(key, 0, kTypeValue);

    builder->Add(ik.Encode(), value);
  }
  ASSERT_OK(builder->Finish());
  file_writer->Flush();

  test::RandomRWStringSink ss_rw(sink);
  uint32_t version;
  uint64_t global_seqno;
  uint64_t global_seqno_offset;

  // Helper function to get version, global_seqno, global_seqno_offset
  std::function<void()> GetVersionAndGlobalSeqno = [&]() {
3601
    std::unique_ptr<RandomAccessFileReader> file_reader(
3602 3603 3604 3605 3606 3607
        test::GetRandomAccessFileReader(
            new test::StringSource(ss_rw.contents(), 73342, true)));

    TableProperties* props = nullptr;
    ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(),
                                  kBlockBasedTableMagicNumber, ioptions,
3608
                                  &props, true /* compression_type_missing */));
3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629

    UserCollectedProperties user_props = props->user_collected_properties;
    version = DecodeFixed32(
        user_props[ExternalSstFilePropertyNames::kVersion].c_str());
    global_seqno = DecodeFixed64(
        user_props[ExternalSstFilePropertyNames::kGlobalSeqno].c_str());
    global_seqno_offset =
        props->properties_offsets[ExternalSstFilePropertyNames::kGlobalSeqno];

    delete props;
  };

  // Helper function to update the value of the global seqno in the file
  std::function<void(uint64_t)> SetGlobalSeqno = [&](uint64_t val) {
    std::string new_global_seqno;
    PutFixed64(&new_global_seqno, val);

    ASSERT_OK(ss_rw.Write(global_seqno_offset, new_global_seqno));
  };

  // Helper function to get the contents of the table InternalIterator
3630
  std::unique_ptr<TableReader> table_reader;
3631
  std::function<InternalIterator*()> GetTableInternalIter = [&]() {
3632
    std::unique_ptr<RandomAccessFileReader> file_reader(
3633 3634 3635 3636
        test::GetRandomAccessFileReader(
            new test::StringSource(ss_rw.contents(), 73342, true)));

    options.table_factory->NewTableReader(
3637 3638 3639
        TableReaderOptions(ioptions, moptions.prefix_extractor.get(),
                           EnvOptions(), ikc),
        std::move(file_reader), ss_rw.contents().size(), &table_reader);
3640

3641 3642 3643
    return table_reader->NewIterator(
        ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr,
        /*skip_filters=*/false, TableReaderCaller::kUncategorized);
3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741
  };

  GetVersionAndGlobalSeqno();
  ASSERT_EQ(2, version);
  ASSERT_EQ(0, global_seqno);

  InternalIterator* iter = GetTableInternalIter();
  char current_c = 'a';
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
    ParsedInternalKey pik;
    ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));

    ASSERT_EQ(pik.type, ValueType::kTypeValue);
    ASSERT_EQ(pik.sequence, 0);
    ASSERT_EQ(pik.user_key, iter->value());
    ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
    current_c++;
  }
  ASSERT_EQ(current_c, 'z' + 1);
  delete iter;

  // Update global sequence number to 10
  SetGlobalSeqno(10);
  GetVersionAndGlobalSeqno();
  ASSERT_EQ(2, version);
  ASSERT_EQ(10, global_seqno);

  iter = GetTableInternalIter();
  current_c = 'a';
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
    ParsedInternalKey pik;
    ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));

    ASSERT_EQ(pik.type, ValueType::kTypeValue);
    ASSERT_EQ(pik.sequence, 10);
    ASSERT_EQ(pik.user_key, iter->value());
    ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
    current_c++;
  }
  ASSERT_EQ(current_c, 'z' + 1);

  // Verify Seek
  for (char c = 'a'; c <= 'z'; c++) {
    std::string k = std::string(8, c);
    InternalKey ik(k, 10, kValueTypeForSeek);
    iter->Seek(ik.Encode());
    ASSERT_TRUE(iter->Valid());

    ParsedInternalKey pik;
    ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));

    ASSERT_EQ(pik.type, ValueType::kTypeValue);
    ASSERT_EQ(pik.sequence, 10);
    ASSERT_EQ(pik.user_key.ToString(), k);
    ASSERT_EQ(iter->value().ToString(), k);
  }
  delete iter;

  // Update global sequence number to 3
  SetGlobalSeqno(3);
  GetVersionAndGlobalSeqno();
  ASSERT_EQ(2, version);
  ASSERT_EQ(3, global_seqno);

  iter = GetTableInternalIter();
  current_c = 'a';
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
    ParsedInternalKey pik;
    ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));

    ASSERT_EQ(pik.type, ValueType::kTypeValue);
    ASSERT_EQ(pik.sequence, 3);
    ASSERT_EQ(pik.user_key, iter->value());
    ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
    current_c++;
  }
  ASSERT_EQ(current_c, 'z' + 1);

  // Verify Seek
  for (char c = 'a'; c <= 'z'; c++) {
    std::string k = std::string(8, c);
    // seqno=4 is less than 3 so we still should get our key
    InternalKey ik(k, 4, kValueTypeForSeek);
    iter->Seek(ik.Encode());
    ASSERT_TRUE(iter->Valid());

    ParsedInternalKey pik;
    ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));

    ASSERT_EQ(pik.type, ValueType::kTypeValue);
    ASSERT_EQ(pik.sequence, 3);
    ASSERT_EQ(pik.user_key.ToString(), k);
    ASSERT_EQ(iter->value().ToString(), k);
  }

  delete iter;
}

3742 3743
TEST_P(BlockBasedTableTest, BlockAlignTest) {
  BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
3744 3745
  bbto.block_align = true;
  test::StringSink* sink = new test::StringSink();
3746
  std::unique_ptr<WritableFileWriter> file_writer(
3747
      test::GetWritableFileWriter(sink, "" /* don't care */));
3748 3749 3750 3751
  Options options;
  options.compression = kNoCompression;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  const ImmutableCFOptions ioptions(options);
3752
  const MutableCFOptions moptions(options);
3753 3754 3755 3756 3757
  InternalKeyComparator ikc(options.comparator);
  std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
      int_tbl_prop_collector_factories;
  std::string column_family_name;
  std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
3758 3759
      TableBuilderOptions(ioptions, moptions, ikc,
                          &int_tbl_prop_collector_factories, kNoCompression,
3760 3761
                          0 /* sample_for_compression */, CompressionOptions(),
                          false /* skip_filters */, column_family_name, -1),
3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777
      TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
      file_writer.get()));

  for (int i = 1; i <= 10000; ++i) {
    std::ostringstream ostr;
    ostr << std::setfill('0') << std::setw(5) << i;
    std::string key = ostr.str();
    std::string value = "val";
    InternalKey ik(key, 0, kTypeValue);

    builder->Add(ik.Encode(), value);
  }
  ASSERT_OK(builder->Finish());
  file_writer->Flush();

  test::RandomRWStringSink ss_rw(sink);
3778
  std::unique_ptr<RandomAccessFileReader> file_reader(
3779 3780 3781 3782 3783 3784 3785 3786
      test::GetRandomAccessFileReader(
          new test::StringSource(ss_rw.contents(), 73342, true)));

  // Helper function to get version, global_seqno, global_seqno_offset
  std::function<void()> VerifyBlockAlignment = [&]() {
    TableProperties* props = nullptr;
    ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(),
                                  kBlockBasedTableMagicNumber, ioptions,
3787
                                  &props, true /* compression_type_missing */));
3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804

    uint64_t data_block_size = props->data_size / props->num_data_blocks;
    ASSERT_EQ(data_block_size, 4096);
    ASSERT_EQ(props->data_size, data_block_size * props->num_data_blocks);
    delete props;
  };

  VerifyBlockAlignment();

  // The below block of code verifies that we can read back the keys. Set
  // block_align to false when creating the reader to ensure we can flip between
  // the two modes without any issues
  std::unique_ptr<TableReader> table_reader;
  bbto.block_align = false;
  Options options2;
  options2.table_factory.reset(NewBlockBasedTableFactory(bbto));
  ImmutableCFOptions ioptions2(options2);
3805 3806
  const MutableCFOptions moptions2(options2);

3807
  ASSERT_OK(ioptions.table_factory->NewTableReader(
3808 3809
      TableReaderOptions(ioptions2, moptions2.prefix_extractor.get(),
                         EnvOptions(),
3810 3811 3812
                         GetPlainInternalComparator(options2.comparator)),
      std::move(file_reader), ss_rw.contents().size(), &table_reader));

3813
  std::unique_ptr<InternalIterator> db_iter(table_reader->NewIterator(
3814 3815
      ReadOptions(), moptions2.prefix_extractor.get(), /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized));
3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832

  int expected_key = 1;
  for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
    std::ostringstream ostr;
    ostr << std::setfill('0') << std::setw(5) << expected_key++;
    std::string key = ostr.str();
    std::string value = "val";

    ASSERT_OK(db_iter->status());
    ASSERT_EQ(ExtractUserKey(db_iter->key()).ToString(), key);
    ASSERT_EQ(db_iter->value().ToString(), value);
  }
  expected_key--;
  ASSERT_EQ(expected_key, 10000);
  table_reader.reset();
}

3833 3834 3835 3836
TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
  BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
  bbto.block_align = true;
  test::StringSink* sink = new test::StringSink();
3837
  std::unique_ptr<WritableFileWriter> file_writer(
3838
      test::GetWritableFileWriter(sink, "" /* don't care */));
3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853

  Options options;
  options.compression = kNoCompression;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));

  const ImmutableCFOptions ioptions(options);
  const MutableCFOptions moptions(options);
  InternalKeyComparator ikc(options.comparator);
  std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
      int_tbl_prop_collector_factories;
  std::string column_family_name;

  std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
      TableBuilderOptions(ioptions, moptions, ikc,
                          &int_tbl_prop_collector_factories, kNoCompression,
3854 3855
                          0 /* sample_for_compression */, CompressionOptions(),
                          false /* skip_filters */, column_family_name, -1),
3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871
      TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
      file_writer.get()));

  for (int i = 1; i <= 10000; ++i) {
    std::ostringstream ostr;
    ostr << std::setfill('0') << std::setw(5) << i;
    std::string key = ostr.str();
    std::string value = "val";
    InternalKey ik(key, 0, kTypeValue);

    builder->Add(ik.Encode(), value);
  }
  ASSERT_OK(builder->Finish());
  file_writer->Flush();

  test::RandomRWStringSink ss_rw(sink);
3872
  std::unique_ptr<RandomAccessFileReader> file_reader(
3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883
      test::GetRandomAccessFileReader(
          new test::StringSource(ss_rw.contents(), 73342, true)));

  {
    RandomAccessFileReader* file = file_reader.get();
    uint64_t file_size = ss_rw.contents().size();

    Footer footer;
    ASSERT_OK(ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size,
                                 &footer, kBlockBasedTableMagicNumber));

3884
    auto BlockFetchHelper = [&](const BlockHandle& handle, BlockType block_type,
3885 3886 3887 3888 3889
                                BlockContents* contents) {
      ReadOptions read_options;
      read_options.verify_checksums = false;
      PersistentCacheOptions cache_options;

3890 3891 3892
      BlockFetcher block_fetcher(
          file, nullptr /* prefetch_buffer */, footer, read_options, handle,
          contents, ioptions, false /* decompress */,
3893 3894
          false /*maybe_compressed*/, block_type,
          UncompressionDict::GetEmptyDict(), cache_options);
3895 3896 3897 3898 3899 3900 3901 3902

      ASSERT_OK(block_fetcher.ReadBlockContents());
    };

    // -- Read metaindex block
    auto metaindex_handle = footer.metaindex_handle();
    BlockContents metaindex_contents;

3903
    BlockFetchHelper(metaindex_handle, BlockType::kMetaIndex,
3904
                     &metaindex_contents);
3905 3906 3907
    Block metaindex_block(std::move(metaindex_contents),
                          kDisableGlobalSequenceNumber);

3908 3909
    std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator(
        BytewiseComparator(), BytewiseComparator()));
3910 3911 3912 3913 3914 3915 3916 3917 3918 3919
    bool found_properties_block = true;
    ASSERT_OK(SeekToPropertiesBlock(meta_iter.get(), &found_properties_block));
    ASSERT_TRUE(found_properties_block);

    // -- Read properties block
    Slice v = meta_iter->value();
    BlockHandle properties_handle;
    ASSERT_OK(properties_handle.DecodeFrom(&v));
    BlockContents properties_contents;

3920
    BlockFetchHelper(properties_handle, BlockType::kProperties,
3921
                     &properties_contents);
3922 3923 3924 3925 3926 3927 3928
    Block properties_block(std::move(properties_contents),
                           kDisableGlobalSequenceNumber);

    ASSERT_EQ(properties_block.NumRestarts(), 1);
  }
}

3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979
TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) {
  // The properties meta-block should come at the end since we always need to
  // read it when opening a file, unlike index/filter/other meta-blocks, which
  // are sometimes read depending on the user's configuration. This ordering
  // allows us to do a small readahead on the end of the file to read properties
  // and meta-index blocks with one I/O.
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
  c.Add("a1", "val1");
  c.Add("b2", "val2");
  c.Add("c3", "val3");
  c.Add("d4", "val4");
  c.Add("e5", "val5");
  c.Add("f6", "val6");
  c.Add("g7", "val7");
  c.Add("h8", "val8");
  c.Add("j9", "val9");

  // write an SST file
  Options options;
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  table_options.filter_policy.reset(NewBloomFilterPolicy(
      8 /* bits_per_key */, false /* use_block_based_filter */));
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  ImmutableCFOptions ioptions(options);
  MutableCFOptions moptions(options);
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  c.Finish(options, ioptions, moptions, table_options,
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);

  // get file reader
  test::StringSink* table_sink = c.TEST_GetSink();
  std::unique_ptr<RandomAccessFileReader> table_reader{
      test::GetRandomAccessFileReader(
          new test::StringSource(table_sink->contents(), 0 /* unique_id */,
                                 false /* allow_mmap_reads */))};
  size_t table_size = table_sink->contents().size();

  // read footer
  Footer footer;
  ASSERT_OK(ReadFooterFromFile(table_reader.get(),
                               nullptr /* prefetch_buffer */, table_size,
                               &footer, kBlockBasedTableMagicNumber));

  // read metaindex
  auto metaindex_handle = footer.metaindex_handle();
  BlockContents metaindex_contents;
  PersistentCacheOptions pcache_opts;
  BlockFetcher block_fetcher(
      table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(),
      metaindex_handle, &metaindex_contents, ioptions, false /* decompress */,
3980 3981 3982
      false /*maybe_compressed*/, BlockType::kMetaIndex,
      UncompressionDict::GetEmptyDict(), pcache_opts,
      nullptr /*memory_allocator*/);
3983 3984 3985 3986 3987 3988
  ASSERT_OK(block_fetcher.ReadBlockContents());
  Block metaindex_block(std::move(metaindex_contents),
                        kDisableGlobalSequenceNumber);

  // verify properties block comes last
  std::unique_ptr<InternalIterator> metaindex_iter{
3989
      metaindex_block.NewDataIterator(options.comparator, options.comparator)};
3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008
  uint64_t max_offset = 0;
  std::string key_at_max_offset;
  for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid();
       metaindex_iter->Next()) {
    BlockHandle handle;
    Slice value = metaindex_iter->value();
    ASSERT_OK(handle.DecodeFrom(&value));
    if (handle.offset() > max_offset) {
      max_offset = handle.offset();
      key_at_max_offset = metaindex_iter->key().ToString();
    }
  }
  ASSERT_EQ(kPropertiesBlock, key_at_max_offset);
  // index handle is stored in footer rather than metaindex block, so need
  // separate logic to verify it comes before properties block.
  ASSERT_GT(max_offset, footer.index_handle().offset());
  c.ResetTableReader();
}

4009
TEST_P(BlockBasedTableTest, BadOptions) {
4010 4011
  rocksdb::Options options;
  options.compression = kNoCompression;
4012
  BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4013 4014 4015
  bbto.block_size = 4000;
  bbto.block_align = true;

4016
  const std::string kDBPath =
4017
      test::PerThreadDBPath("block_based_table_bad_options_test");
4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  DestroyDB(kDBPath, options);
  rocksdb::DB* db;
  ASSERT_NOK(rocksdb::DB::Open(options, kDBPath, &db));

  bbto.block_size = 4096;
  options.compression = kSnappyCompression;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  ASSERT_NOK(rocksdb::DB::Open(options, kDBPath, &db));
}

4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071
TEST_F(BBTTailPrefetchTest, TestTailPrefetchStats) {
  TailPrefetchStats tpstats;
  ASSERT_EQ(0, tpstats.GetSuggestedPrefetchSize());
  tpstats.RecordEffectiveSize(size_t{1000});
  tpstats.RecordEffectiveSize(size_t{1005});
  tpstats.RecordEffectiveSize(size_t{1002});
  ASSERT_EQ(1005, tpstats.GetSuggestedPrefetchSize());

  // One single super large value shouldn't influence much
  tpstats.RecordEffectiveSize(size_t{1002000});
  tpstats.RecordEffectiveSize(size_t{999});
  ASSERT_LE(1005, tpstats.GetSuggestedPrefetchSize());
  ASSERT_GT(1200, tpstats.GetSuggestedPrefetchSize());

  // Only history of 32 is kept
  for (int i = 0; i < 32; i++) {
    tpstats.RecordEffectiveSize(size_t{100});
  }
  ASSERT_EQ(100, tpstats.GetSuggestedPrefetchSize());

  // 16 large values and 16 small values. The result should be closer
  // to the small value as the algorithm.
  for (int i = 0; i < 16; i++) {
    tpstats.RecordEffectiveSize(size_t{1000});
  }
  tpstats.RecordEffectiveSize(size_t{10});
  tpstats.RecordEffectiveSize(size_t{20});
  for (int i = 0; i < 6; i++) {
    tpstats.RecordEffectiveSize(size_t{100});
  }
  ASSERT_LE(80, tpstats.GetSuggestedPrefetchSize());
  ASSERT_GT(200, tpstats.GetSuggestedPrefetchSize());
}

TEST_F(BBTTailPrefetchTest, FilePrefetchBufferMinOffset) {
  TailPrefetchStats tpstats;
  FilePrefetchBuffer buffer(nullptr, 0, 0, false, true);
  buffer.TryReadFromCache(500, 10, nullptr);
  buffer.TryReadFromCache(480, 10, nullptr);
  buffer.TryReadFromCache(490, 10, nullptr);
  ASSERT_EQ(480, buffer.min_offset_read());
}

4072 4073 4074 4075 4076 4077 4078
TEST_P(BlockBasedTableTest, DataBlockHashIndex) {
  const int kNumKeys = 500;
  const int kKeySize = 8;
  const int kValSize = 40;

  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  table_options.data_block_index_type =
4079
      BlockBasedTableOptions::kDataBlockBinaryAndHash;
4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106

  Options options;
  options.comparator = BytewiseComparator();

  options.table_factory.reset(new BlockBasedTableFactory(table_options));

  TableConstructor c(options.comparator);

  static Random rnd(1048);
  for (int i = 0; i < kNumKeys; i++) {
    // padding one "0" to mark existent keys.
    std::string random_key(RandomString(&rnd, kKeySize - 1) + "1");
    InternalKey k(random_key, 0, kTypeValue);
    c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize));
  }

  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  const ImmutableCFOptions ioptions(options);
  const MutableCFOptions moptions(options);
  const InternalKeyComparator internal_comparator(options.comparator);
  c.Finish(options, ioptions, moptions, table_options, internal_comparator,
           &keys, &kvmap);

  auto reader = c.GetTableReader();

  std::unique_ptr<InternalIterator> seek_iter;
4107 4108 4109
  seek_iter.reset(reader->NewIterator(
      ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized));
4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144
  for (int i = 0; i < 2; ++i) {
    ReadOptions ro;
    // for every kv, we seek using two method: Get() and Seek()
    // Get() will use the SuffixIndexHash in Block. For non-existent key it
    //      will invalidate the iterator
    // Seek() will use the default BinarySeek() in Block. So for non-existent
    //      key it will land at the closest key that is large than target.

    // Search for existent keys
    for (auto& kv : kvmap) {
      if (i == 0) {
        // Search using Seek()
        seek_iter->Seek(kv.first);
        ASSERT_OK(seek_iter->status());
        ASSERT_TRUE(seek_iter->Valid());
        ASSERT_EQ(seek_iter->key(), kv.first);
        ASSERT_EQ(seek_iter->value(), kv.second);
      } else {
        // Search using Get()
        PinnableSlice value;
        std::string user_key = ExtractUserKey(kv.first).ToString();
        GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
                               GetContext::kNotFound, user_key, &value, nullptr,
                               nullptr, nullptr, nullptr);
        ASSERT_OK(reader->Get(ro, kv.first, &get_context,
                              moptions.prefix_extractor.get()));
        ASSERT_EQ(get_context.State(), GetContext::kFound);
        ASSERT_EQ(value, Slice(kv.second));
        value.Reset();
      }
    }

    // Search for non-existent keys
    for (auto& kv : kvmap) {
      std::string user_key = ExtractUserKey(kv.first).ToString();
4145
      user_key.back() = '0';  // make it non-existent key
4146 4147
      InternalKey internal_key(user_key, 0, kTypeValue);
      std::string encoded_key = internal_key.Encode().ToString();
4148
      if (i == 0) {  // Search using Seek()
4149 4150
        seek_iter->Seek(encoded_key);
        ASSERT_OK(seek_iter->status());
4151
        if (seek_iter->Valid()) {
4152 4153 4154
          ASSERT_TRUE(BytewiseComparator()->Compare(
                          user_key, ExtractUserKey(seek_iter->key())) < 0);
        }
4155
      } else {  // Search using Get()
4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168
        PinnableSlice value;
        GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
                               GetContext::kNotFound, user_key, &value, nullptr,
                               nullptr, nullptr, nullptr);
        ASSERT_OK(reader->Get(ro, encoded_key, &get_context,
                              moptions.prefix_extractor.get()));
        ASSERT_EQ(get_context.State(), GetContext::kNotFound);
        value.Reset();
      }
    }
  }
}

4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189
// BlockBasedTableIterator should invalidate itself and return
// OutOfBound()=true immediately after Seek(), to allow LevelIterator
// filter out corresponding level.
TEST_P(BlockBasedTableTest, OutOfBoundOnSeek) {
  TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/);
  c.Add("foo", "v1");
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  Options options;
  BlockBasedTableOptions table_opt(GetBlockBasedTableOptions());
  options.table_factory.reset(NewBlockBasedTableFactory(table_opt));
  const ImmutableCFOptions ioptions(options);
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_opt,
           GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap);
  auto* reader = c.GetTableReader();
  ReadOptions read_opt;
  std::string upper_bound = "bar";
  Slice upper_bound_slice(upper_bound);
  read_opt.iterate_upper_bound = &upper_bound_slice;
  std::unique_ptr<InternalIterator> iter;
4190 4191 4192
  iter.reset(new KeyConvertingIterator(reader->NewIterator(
      read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4193 4194 4195
  iter->SeekToFirst();
  ASSERT_FALSE(iter->Valid());
  ASSERT_TRUE(iter->IsOutOfBound());
4196 4197 4198
  iter.reset(new KeyConvertingIterator(reader->NewIterator(
      read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227
  iter->Seek("foo");
  ASSERT_FALSE(iter->Valid());
  ASSERT_TRUE(iter->IsOutOfBound());
}

// BlockBasedTableIterator should invalidate itself and return
// OutOfBound()=true after Next(), if it finds current index key is no smaller
// than upper bound, unless it is pointing to the last data block.
TEST_P(BlockBasedTableTest, OutOfBoundOnNext) {
  TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/);
  c.Add("bar", "v");
  c.Add("foo", "v");
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  Options options;
  BlockBasedTableOptions table_opt(GetBlockBasedTableOptions());
  table_opt.flush_block_policy_factory =
      std::make_shared<FlushBlockEveryKeyPolicyFactory>();
  options.table_factory.reset(NewBlockBasedTableFactory(table_opt));
  const ImmutableCFOptions ioptions(options);
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_opt,
           GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap);
  auto* reader = c.GetTableReader();
  ReadOptions read_opt;
  std::string ub1 = "bar_after";
  Slice ub_slice1(ub1);
  read_opt.iterate_upper_bound = &ub_slice1;
  std::unique_ptr<InternalIterator> iter;
4228 4229 4230
  iter.reset(new KeyConvertingIterator(reader->NewIterator(
      read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4231 4232 4233 4234 4235 4236 4237 4238 4239
  iter->Seek("bar");
  ASSERT_TRUE(iter->Valid());
  ASSERT_EQ("bar", iter->key());
  iter->Next();
  ASSERT_FALSE(iter->Valid());
  ASSERT_TRUE(iter->IsOutOfBound());
  std::string ub2 = "foo_after";
  Slice ub_slice2(ub2);
  read_opt.iterate_upper_bound = &ub_slice2;
4240 4241 4242
  iter.reset(new KeyConvertingIterator(reader->NewIterator(
      read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4243 4244 4245 4246 4247 4248 4249 4250
  iter->Seek("foo");
  ASSERT_TRUE(iter->Valid());
  ASSERT_EQ("foo", iter->key());
  iter->Next();
  ASSERT_FALSE(iter->Valid());
  ASSERT_FALSE(iter->IsOutOfBound());
}

4251
}  // namespace rocksdb
J
jorlow@chromium.org 已提交
4252 4253

int main(int argc, char** argv) {
I
Igor Sugak 已提交
4254 4255
  ::testing::InitGoogleTest(&argc, argv);
  return RUN_ALL_TESTS();
J
jorlow@chromium.org 已提交
4256
}