table_test.cc 173.5 KB
Newer Older
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
S
Siying Dong 已提交
2 3 4
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
5
//
J
jorlow@chromium.org 已提交
6 7 8
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
9 10

#include <stdio.h>
K
Kai Liu 已提交
11
#include <algorithm>
12
#include <iostream>
J
jorlow@chromium.org 已提交
13
#include <map>
J
Jim Paton 已提交
14
#include <memory>
15
#include <string>
K
Kai Liu 已提交
16 17
#include <vector>

18
#include "block_fetcher.h"
M
Maysam Yabandeh 已提交
19
#include "cache/lru_cache.h"
J
jorlow@chromium.org 已提交
20 21 22
#include "db/dbformat.h"
#include "db/memtable.h"
#include "db/write_batch_internal.h"
23
#include "memtable/stl_wrappers.h"
24
#include "meta_blocks.h"
25
#include "monitoring/statistics.h"
D
Dmitri Smirnov 已提交
26
#include "port/port.h"
K
Kai Liu 已提交
27
#include "rocksdb/cache.h"
28 29
#include "rocksdb/db.h"
#include "rocksdb/env.h"
30
#include "rocksdb/file_checksum.h"
31
#include "rocksdb/file_system.h"
32 33
#include "rocksdb/iterator.h"
#include "rocksdb/memtablerep.h"
I
Igor Canadi 已提交
34
#include "rocksdb/perf_context.h"
35 36
#include "rocksdb/slice_transform.h"
#include "rocksdb/statistics.h"
37
#include "rocksdb/write_buffer_manager.h"
38 39 40 41 42 43
#include "table/block_based/block.h"
#include "table/block_based/block_based_table_builder.h"
#include "table/block_based/block_based_table_factory.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/block_builder.h"
#include "table/block_based/flush_block_policy.h"
J
jorlow@chromium.org 已提交
44
#include "table/format.h"
45
#include "table/get_context.h"
S
sdong 已提交
46
#include "table/internal_iterator.h"
47
#include "table/plain/plain_table_factory.h"
S
sdong 已提交
48
#include "table/scoped_arena_iterator.h"
49
#include "table/sst_file_writer_collectors.h"
50 51 52
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
53
#include "util/compression.h"
54
#include "util/file_checksum_helper.h"
55 56
#include "util/random.h"
#include "util/string_util.h"
57
#include "utilities/merge_operators.h"
58

59
namespace ROCKSDB_NAMESPACE {
J
jorlow@chromium.org 已提交
60

I
xxHash  
Igor Canadi 已提交
61 62 63 64 65
extern const uint64_t kLegacyBlockBasedTableMagicNumber;
extern const uint64_t kLegacyPlainTableMagicNumber;
extern const uint64_t kBlockBasedTableMagicNumber;
extern const uint64_t kPlainTableMagicNumber;

66
namespace {
K
Kai Liu 已提交
67

68 69
const std::string kDummyValue(10000, 'o');

70 71 72
// DummyPropertiesCollector used to test BlockBasedTableProperties
class DummyPropertiesCollector : public TablePropertiesCollector {
 public:
73
  const char* Name() const override { return ""; }
74

75
  Status Finish(UserCollectedProperties* /*properties*/) override {
A
Andrew Kryczka 已提交
76 77
    return Status::OK();
  }
78

79
  Status Add(const Slice& /*user_key*/, const Slice& /*value*/) override {
A
Andrew Kryczka 已提交
80 81
    return Status::OK();
  }
82

83
  UserCollectedProperties GetReadableProperties() const override {
84 85 86 87 88 89 90
    return UserCollectedProperties{};
  }
};

class DummyPropertiesCollectorFactory1
    : public TablePropertiesCollectorFactory {
 public:
91 92
  TablePropertiesCollector* CreateTablePropertiesCollector(
      TablePropertiesCollectorFactory::Context /*context*/) override {
93 94
    return new DummyPropertiesCollector();
  }
95
  const char* Name() const override { return "DummyPropertiesCollector1"; }
96 97 98 99 100
};

class DummyPropertiesCollectorFactory2
    : public TablePropertiesCollectorFactory {
 public:
101 102
  TablePropertiesCollector* CreateTablePropertiesCollector(
      TablePropertiesCollectorFactory::Context /*context*/) override {
103 104
    return new DummyPropertiesCollector();
  }
105
  const char* Name() const override { return "DummyPropertiesCollector2"; }
106 107
};

J
jorlow@chromium.org 已提交
108 109
// Return reverse of "key".
// Used to test non-lexicographic comparators.
K
Kai Liu 已提交
110 111 112
std::string Reverse(const Slice& key) {
  auto rev = key.ToString();
  std::reverse(rev.begin(), rev.end());
J
jorlow@chromium.org 已提交
113 114 115 116 117
  return rev;
}

class ReverseKeyComparator : public Comparator {
 public:
118
  const char* Name() const override {
119
    return "rocksdb.ReverseBytewiseComparator";
J
jorlow@chromium.org 已提交
120 121
  }

122
  int Compare(const Slice& a, const Slice& b) const override {
J
jorlow@chromium.org 已提交
123 124 125
    return BytewiseComparator()->Compare(Reverse(a), Reverse(b));
  }

126 127
  void FindShortestSeparator(std::string* start,
                             const Slice& limit) const override {
J
jorlow@chromium.org 已提交
128 129 130 131 132 133
    std::string s = Reverse(*start);
    std::string l = Reverse(limit);
    BytewiseComparator()->FindShortestSeparator(&s, l);
    *start = Reverse(s);
  }

134
  void FindShortSuccessor(std::string* key) const override {
J
jorlow@chromium.org 已提交
135 136 137 138 139 140
    std::string s = Reverse(*key);
    BytewiseComparator()->FindShortSuccessor(&s);
    *key = Reverse(s);
  }
};

K
Kai Liu 已提交
141 142 143
ReverseKeyComparator reverse_key_comparator;

void Increment(const Comparator* cmp, std::string* key) {
J
jorlow@chromium.org 已提交
144 145 146 147 148 149 150 151 152 153
  if (cmp == BytewiseComparator()) {
    key->push_back('\0');
  } else {
    assert(cmp == &reverse_key_comparator);
    std::string rev = Reverse(*key);
    rev.push_back('\0');
    *key = Reverse(rev);
  }
}

H
Hans Wennborg 已提交
154
}  // namespace
J
jorlow@chromium.org 已提交
155 156 157 158 159

// Helper class for tests to unify the interface between
// BlockBuilder/TableBuilder and Block/Table.
class Constructor {
 public:
160 161
  explicit Constructor(const Comparator* cmp)
      : data_(stl_wrappers::LessOfComparator(cmp)) {}
J
jorlow@chromium.org 已提交
162 163 164 165 166 167 168 169 170
  virtual ~Constructor() { }

  void Add(const std::string& key, const Slice& value) {
    data_[key] = value.ToString();
  }

  // Finish constructing the data structure with all the keys that have
  // been added so far.  Returns the keys in sorted order in "*keys"
  // and stores the key/value pairs in "*kvmap"
171
  void Finish(const Options& options, const ImmutableCFOptions& ioptions,
172
              const MutableCFOptions& moptions,
173
              const BlockBasedTableOptions& table_options,
174
              const InternalKeyComparator& internal_comparator,
175
              std::vector<std::string>* keys, stl_wrappers::KVMap* kvmap) {
176
    last_internal_key_ = &internal_comparator;
J
jorlow@chromium.org 已提交
177 178
    *kvmap = data_;
    keys->clear();
179 180
    for (const auto& kv : data_) {
      keys->push_back(kv.first);
J
jorlow@chromium.org 已提交
181 182
    }
    data_.clear();
183
    Status s = FinishImpl(options, ioptions, moptions, table_options,
L
Lei Jin 已提交
184
                          internal_comparator, *kvmap);
J
jorlow@chromium.org 已提交
185 186 187 188
    ASSERT_TRUE(s.ok()) << s.ToString();
  }

  // Construct the data structure from the data in "data"
189
  virtual Status FinishImpl(const Options& options,
L
Lei Jin 已提交
190
                            const ImmutableCFOptions& ioptions,
191
                            const MutableCFOptions& moptions,
192
                            const BlockBasedTableOptions& table_options,
193
                            const InternalKeyComparator& internal_comparator,
194
                            const stl_wrappers::KVMap& data) = 0;
J
jorlow@chromium.org 已提交
195

196 197
  virtual InternalIterator* NewIterator(
      const SliceTransform* prefix_extractor = nullptr) const = 0;
J
jorlow@chromium.org 已提交
198

199
  virtual const stl_wrappers::KVMap& data() { return data_; }
J
jorlow@chromium.org 已提交
200

201 202
  virtual bool IsArenaMode() const { return false; }

A
Abhishek Kona 已提交
203
  virtual DB* db() const { return nullptr; }  // Overridden in DBConstructor
J
jorlow@chromium.org 已提交
204

205 206
  virtual bool AnywayDeleteIterator() const { return false; }

207 208 209
 protected:
  const InternalKeyComparator* last_internal_key_;

J
jorlow@chromium.org 已提交
210
 private:
211
  stl_wrappers::KVMap data_;
J
jorlow@chromium.org 已提交
212 213 214 215 216 217 218
};

class BlockConstructor: public Constructor {
 public:
  explicit BlockConstructor(const Comparator* cmp)
      : Constructor(cmp),
        comparator_(cmp),
A
Abhishek Kona 已提交
219
        block_(nullptr) { }
220 221 222 223 224 225 226
  ~BlockConstructor() override { delete block_; }
  Status FinishImpl(const Options& /*options*/,
                    const ImmutableCFOptions& /*ioptions*/,
                    const MutableCFOptions& /*moptions*/,
                    const BlockBasedTableOptions& table_options,
                    const InternalKeyComparator& /*internal_comparator*/,
                    const stl_wrappers::KVMap& kv_map) override {
J
jorlow@chromium.org 已提交
227
    delete block_;
A
Abhishek Kona 已提交
228
    block_ = nullptr;
I
Igor Canadi 已提交
229
    BlockBuilder builder(table_options.block_restart_interval);
J
jorlow@chromium.org 已提交
230

I
Igor Canadi 已提交
231 232
    for (const auto kv : kv_map) {
      builder.Add(kv.first, kv.second);
J
jorlow@chromium.org 已提交
233 234
    }
    // Open the block
S
Sanjay Ghemawat 已提交
235 236 237
    data_ = builder.Finish().ToString();
    BlockContents contents;
    contents.data = data_;
238
    block_ = new Block(std::move(contents));
J
jorlow@chromium.org 已提交
239 240
    return Status::OK();
  }
241
  InternalIterator* NewIterator(
242
      const SliceTransform* /*prefix_extractor*/) const override {
243 244
    return block_->NewDataIterator(comparator_, comparator_,
                                   kDisableGlobalSequenceNumber);
J
jorlow@chromium.org 已提交
245 246 247 248
  }

 private:
  const Comparator* comparator_;
S
Sanjay Ghemawat 已提交
249
  std::string data_;
J
jorlow@chromium.org 已提交
250 251 252 253 254
  Block* block_;

  BlockConstructor();
};

255
// A helper class that converts internal format keys into user keys
S
sdong 已提交
256
class KeyConvertingIterator : public InternalIterator {
J
jorlow@chromium.org 已提交
257
 public:
S
sdong 已提交
258 259
  explicit KeyConvertingIterator(InternalIterator* iter,
                                 bool arena_mode = false)
260
      : iter_(iter), arena_mode_(arena_mode) {}
261
  ~KeyConvertingIterator() override {
262
    if (arena_mode_) {
S
sdong 已提交
263
      iter_->~InternalIterator();
264 265 266 267
    } else {
      delete iter_;
    }
  }
268 269
  bool Valid() const override { return iter_->Valid() && status_.ok(); }
  void Seek(const Slice& target) override {
270 271 272 273 274
    ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
    std::string encoded;
    AppendInternalKey(&encoded, ikey);
    iter_->Seek(encoded);
  }
275
  void SeekForPrev(const Slice& target) override {
A
Aaron Gao 已提交
276 277 278 279 280
    ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
    std::string encoded;
    AppendInternalKey(&encoded, ikey);
    iter_->SeekForPrev(encoded);
  }
281 282 283 284
  void SeekToFirst() override { iter_->SeekToFirst(); }
  void SeekToLast() override { iter_->SeekToLast(); }
  void Next() override { iter_->Next(); }
  void Prev() override { iter_->Prev(); }
285
  bool IsOutOfBound() override { return iter_->IsOutOfBound(); }
286

287
  Slice key() const override {
288
    assert(Valid());
I
Igor Canadi 已提交
289 290
    ParsedInternalKey parsed_key;
    if (!ParseInternalKey(iter_->key(), &parsed_key)) {
291 292 293
      status_ = Status::Corruption("malformed internal key");
      return Slice("corrupted key");
    }
I
Igor Canadi 已提交
294
    return parsed_key.user_key;
J
jorlow@chromium.org 已提交
295
  }
296

297 298
  Slice value() const override { return iter_->value(); }
  Status status() const override {
299 300 301 302 303
    return status_.ok() ? iter_->status() : status_;
  }

 private:
  mutable Status status_;
S
sdong 已提交
304
  InternalIterator* iter_;
305
  bool arena_mode_;
306 307 308 309 310 311 312 313

  // No copying allowed
  KeyConvertingIterator(const KeyConvertingIterator&);
  void operator=(const KeyConvertingIterator&);
};

class TableConstructor: public Constructor {
 public:
K
kailiu 已提交
314
  explicit TableConstructor(const Comparator* cmp,
315
                            bool convert_to_internal_key = false,
316
                            int level = -1, SequenceNumber largest_seqno = 0)
317
      : Constructor(cmp),
318
        largest_seqno_(largest_seqno),
319
        convert_to_internal_key_(convert_to_internal_key),
320
        level_(level) {
321
    env_ = ROCKSDB_NAMESPACE::Env::Default();
322
  }
323
  ~TableConstructor() override { Reset(); }
324

325 326 327 328 329
  Status FinishImpl(const Options& options, const ImmutableCFOptions& ioptions,
                    const MutableCFOptions& moptions,
                    const BlockBasedTableOptions& /*table_options*/,
                    const InternalKeyComparator& internal_comparator,
                    const stl_wrappers::KVMap& kv_map) override {
J
jorlow@chromium.org 已提交
330
    Reset();
331
    soptions.use_mmap_reads = ioptions.allow_mmap_reads;
332 333
    file_writer_.reset(test::GetWritableFileWriter(new test::StringSink(),
                                                   "" /* don't care */));
334
    std::unique_ptr<TableBuilder> builder;
335 336
    std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
        int_tbl_prop_collector_factories;
337 338 339 340 341 342 343 344

    if (largest_seqno_ != 0) {
      // Pretend that it's an external file written by SstFileWriter.
      int_tbl_prop_collector_factories.emplace_back(
          new SstFileWriterPropertiesCollectorFactory(2 /* version */,
                                                      0 /* global_seqno*/));
    }

345
    std::string column_family_name;
L
Lei Jin 已提交
346
    builder.reset(ioptions.table_factory->NewTableBuilder(
347 348
        TableBuilderOptions(ioptions, moptions, internal_comparator,
                            &int_tbl_prop_collector_factories,
349
                            options.compression, options.sample_for_compression,
350
                            options.compression_opts, false /* skip_filters */,
351
                            column_family_name, level_),
352
        TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
353
        file_writer_.get()));
J
jorlow@chromium.org 已提交
354

I
Igor Canadi 已提交
355
    for (const auto kv : kv_map) {
356
      if (convert_to_internal_key_) {
I
Igor Canadi 已提交
357
        ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
358 359
        std::string encoded;
        AppendInternalKey(&encoded, ikey);
I
Igor Canadi 已提交
360
        builder->Add(encoded, kv.second);
361
      } else {
I
Igor Canadi 已提交
362
        builder->Add(kv.first, kv.second);
363
      }
364
      EXPECT_TRUE(builder->status().ok());
J
jorlow@chromium.org 已提交
365
    }
366
    Status s = builder->Finish();
367
    file_writer_->Flush();
368
    EXPECT_TRUE(s.ok()) << s.ToString();
J
jorlow@chromium.org 已提交
369

370
    EXPECT_EQ(TEST_GetSink()->contents().size(), builder->FileSize());
J
jorlow@chromium.org 已提交
371 372

    // Open the table
373
    uniq_id_ = cur_uniq_id_++;
A
Andres Notzli 已提交
374
    file_reader_.reset(test::GetRandomAccessFileReader(new test::StringSource(
375
        TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
376 377
    const bool kSkipFilters = true;
    const bool kImmortal = true;
L
Lei Jin 已提交
378
    return ioptions.table_factory->NewTableReader(
379
        TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
380
                           internal_comparator, !kSkipFilters, !kImmortal,
381 382
                           false, level_, largest_seqno_, &block_cache_tracer_,
                           moptions.write_buffer_size),
383 384
        std::move(file_reader_), TEST_GetSink()->contents().size(),
        &table_reader_);
J
jorlow@chromium.org 已提交
385 386
  }

387
  InternalIterator* NewIterator(
388
      const SliceTransform* prefix_extractor) const override {
389
    ReadOptions ro;
390 391 392
    InternalIterator* iter = table_reader_->NewIterator(
        ro, prefix_extractor, /*arena=*/nullptr, /*skip_filters=*/false,
        TableReaderCaller::kUncategorized);
393 394 395 396 397
    if (convert_to_internal_key_) {
      return new KeyConvertingIterator(iter);
    } else {
      return iter;
    }
J
jorlow@chromium.org 已提交
398 399 400
  }

  uint64_t ApproximateOffsetOf(const Slice& key) const {
401 402 403
    if (convert_to_internal_key_) {
      InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
      const Slice skey = ikey.Encode();
404 405
      return table_reader_->ApproximateOffsetOf(
          skey, TableReaderCaller::kUncategorized);
406
    }
407 408
    return table_reader_->ApproximateOffsetOf(
        key, TableReaderCaller::kUncategorized);
J
jorlow@chromium.org 已提交
409 410
  }

411 412
  virtual Status Reopen(const ImmutableCFOptions& ioptions,
                        const MutableCFOptions& moptions) {
A
Andres Notzli 已提交
413
    file_reader_.reset(test::GetRandomAccessFileReader(new test::StringSource(
414
        TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
L
Lei Jin 已提交
415
    return ioptions.table_factory->NewTableReader(
416 417
        TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
                           *last_internal_key_),
418 419
        std::move(file_reader_), TEST_GetSink()->contents().size(),
        &table_reader_);
420 421
  }

422
  virtual TableReader* GetTableReader() { return table_reader_.get(); }
423

424
  bool AnywayDeleteIterator() const override {
425 426 427
    return convert_to_internal_key_;
  }

428 429
  void ResetTableReader() { table_reader_.reset(); }

430 431
  bool ConvertToInternalKey() { return convert_to_internal_key_; }

432
  test::StringSink* TEST_GetSink() {
433 434
    return ROCKSDB_NAMESPACE::test::GetStringSinkFromLegacyWriter(
        file_writer_.get());
435 436
  }

437 438
  BlockCacheTracer block_cache_tracer_;

J
jorlow@chromium.org 已提交
439 440
 private:
  void Reset() {
441
    uniq_id_ = 0;
S
Siying Dong 已提交
442
    table_reader_.reset();
443 444 445 446
    file_writer_.reset();
    file_reader_.reset();
  }

447
  uint64_t uniq_id_;
448 449 450
  std::unique_ptr<WritableFileWriter> file_writer_;
  std::unique_ptr<RandomAccessFileReader> file_reader_;
  std::unique_ptr<TableReader> table_reader_;
451
  SequenceNumber largest_seqno_;
452
  bool convert_to_internal_key_;
453
  int level_;
J
jorlow@chromium.org 已提交
454

455
  TableConstructor();
456 457

  static uint64_t cur_uniq_id_;
458
  EnvOptions soptions;
459
  Env* env_;
J
jorlow@chromium.org 已提交
460
};
461
uint64_t TableConstructor::cur_uniq_id_ = 1;
J
jorlow@chromium.org 已提交
462 463 464

class MemTableConstructor: public Constructor {
 public:
465
  explicit MemTableConstructor(const Comparator* cmp, WriteBufferManager* wb)
J
jorlow@chromium.org 已提交
466
      : Constructor(cmp),
J
Jim Paton 已提交
467
        internal_comparator_(cmp),
468
        write_buffer_manager_(wb),
J
Jim Paton 已提交
469
        table_factory_(new SkipListFactory) {
470 471
    options_.memtable_factory = table_factory_;
    ImmutableCFOptions ioptions(options_);
Y
Yi Wu 已提交
472 473
    memtable_ =
        new MemTable(internal_comparator_, ioptions, MutableCFOptions(options_),
474
                     wb, kMaxSequenceNumber, 0 /* column_family_id */);
475
    memtable_->Ref();
J
jorlow@chromium.org 已提交
476
  }
477 478 479 480 481 482
  ~MemTableConstructor() override { delete memtable_->Unref(); }
  Status FinishImpl(const Options&, const ImmutableCFOptions& ioptions,
                    const MutableCFOptions& /*moptions*/,
                    const BlockBasedTableOptions& /*table_options*/,
                    const InternalKeyComparator& /*internal_comparator*/,
                    const stl_wrappers::KVMap& kv_map) override {
483
    delete memtable_->Unref();
484
    ImmutableCFOptions mem_ioptions(ioptions);
485
    memtable_ = new MemTable(internal_comparator_, mem_ioptions,
Y
Yi Wu 已提交
486
                             MutableCFOptions(options_), write_buffer_manager_,
487
                             kMaxSequenceNumber, 0 /* column_family_id */);
488
    memtable_->Ref();
J
jorlow@chromium.org 已提交
489
    int seq = 1;
I
Igor Canadi 已提交
490 491
    for (const auto kv : kv_map) {
      memtable_->Add(seq, kTypeValue, kv.first, kv.second);
J
jorlow@chromium.org 已提交
492 493 494 495
      seq++;
    }
    return Status::OK();
  }
496
  InternalIterator* NewIterator(
497
      const SliceTransform* /*prefix_extractor*/) const override {
498 499
    return new KeyConvertingIterator(
        memtable_->NewIterator(ReadOptions(), &arena_), true);
J
jorlow@chromium.org 已提交
500 501
  }

502
  bool AnywayDeleteIterator() const override { return true; }
503

504
  bool IsArenaMode() const override { return true; }
505

J
jorlow@chromium.org 已提交
506
 private:
507
  mutable Arena arena_;
J
jorlow@chromium.org 已提交
508
  InternalKeyComparator internal_comparator_;
509
  Options options_;
510
  WriteBufferManager* write_buffer_manager_;
J
jorlow@chromium.org 已提交
511
  MemTable* memtable_;
J
Jim Paton 已提交
512
  std::shared_ptr<SkipListFactory> table_factory_;
J
jorlow@chromium.org 已提交
513 514
};

S
sdong 已提交
515 516 517
class InternalIteratorFromIterator : public InternalIterator {
 public:
  explicit InternalIteratorFromIterator(Iterator* it) : it_(it) {}
518 519 520 521 522 523 524
  bool Valid() const override { return it_->Valid(); }
  void Seek(const Slice& target) override { it_->Seek(target); }
  void SeekForPrev(const Slice& target) override { it_->SeekForPrev(target); }
  void SeekToFirst() override { it_->SeekToFirst(); }
  void SeekToLast() override { it_->SeekToLast(); }
  void Next() override { it_->Next(); }
  void Prev() override { it_->Prev(); }
S
sdong 已提交
525 526
  Slice key() const override { return it_->key(); }
  Slice value() const override { return it_->value(); }
527
  Status status() const override { return it_->status(); }
S
sdong 已提交
528 529

 private:
530
  std::unique_ptr<Iterator> it_;
S
sdong 已提交
531 532
};

J
jorlow@chromium.org 已提交
533 534 535 536 537
class DBConstructor: public Constructor {
 public:
  explicit DBConstructor(const Comparator* cmp)
      : Constructor(cmp),
        comparator_(cmp) {
A
Abhishek Kona 已提交
538
    db_ = nullptr;
J
jorlow@chromium.org 已提交
539 540
    NewDB();
  }
541 542 543 544 545 546 547
  ~DBConstructor() override { delete db_; }
  Status FinishImpl(const Options& /*options*/,
                    const ImmutableCFOptions& /*ioptions*/,
                    const MutableCFOptions& /*moptions*/,
                    const BlockBasedTableOptions& /*table_options*/,
                    const InternalKeyComparator& /*internal_comparator*/,
                    const stl_wrappers::KVMap& kv_map) override {
J
jorlow@chromium.org 已提交
548
    delete db_;
A
Abhishek Kona 已提交
549
    db_ = nullptr;
J
jorlow@chromium.org 已提交
550
    NewDB();
I
Igor Canadi 已提交
551
    for (const auto kv : kv_map) {
J
jorlow@chromium.org 已提交
552
      WriteBatch batch;
I
Igor Canadi 已提交
553
      batch.Put(kv.first, kv.second);
554
      EXPECT_TRUE(db_->Write(WriteOptions(), &batch).ok());
J
jorlow@chromium.org 已提交
555 556 557
    }
    return Status::OK();
  }
S
sdong 已提交
558

559
  InternalIterator* NewIterator(
560
      const SliceTransform* /*prefix_extractor*/) const override {
S
sdong 已提交
561
    return new InternalIteratorFromIterator(db_->NewIterator(ReadOptions()));
J
jorlow@chromium.org 已提交
562 563
  }

564
  DB* db() const override { return db_; }
J
jorlow@chromium.org 已提交
565

J
jorlow@chromium.org 已提交
566 567
 private:
  void NewDB() {
568
    std::string name = test::PerThreadDBPath("table_testdb");
J
jorlow@chromium.org 已提交
569

570
    Options options;
J
jorlow@chromium.org 已提交
571 572 573 574 575 576
    options.comparator = comparator_;
    Status status = DestroyDB(name, options);
    ASSERT_TRUE(status.ok()) << status.ToString();

    options.create_if_missing = true;
    options.error_if_exists = true;
J
jorlow@chromium.org 已提交
577
    options.write_buffer_size = 10000;  // Something small to force merging
J
jorlow@chromium.org 已提交
578 579 580 581 582 583 584 585 586
    status = DB::Open(options, name, &db_);
    ASSERT_TRUE(status.ok()) << status.ToString();
  }

  const Comparator* comparator_;
  DB* db_;
};

enum TestType {
587
  BLOCK_BASED_TABLE_TEST,
588
#ifndef ROCKSDB_LITE
589 590
  PLAIN_TABLE_SEMI_FIXED_PREFIX,
  PLAIN_TABLE_FULL_STR_PREFIX,
591
  PLAIN_TABLE_TOTAL_ORDER,
592
#endif  // !ROCKSDB_LITE
J
jorlow@chromium.org 已提交
593 594
  BLOCK_TEST,
  MEMTABLE_TEST,
595
  DB_TEST
J
jorlow@chromium.org 已提交
596 597 598 599 600 601
};

struct TestArgs {
  TestType type;
  bool reverse_compare;
  int restart_interval;
H
heyongqiang 已提交
602
  CompressionType compression;
603
  uint32_t compression_parallel_threads;
604
  uint32_t format_version;
605
  bool use_mmap;
J
jorlow@chromium.org 已提交
606 607
};

608 609 610 611 612 613 614 615 616 617 618
std::ostream& operator<<(std::ostream& os, const TestArgs& args) {
  os << "type: " << args.type << " reverse_compare: " << args.reverse_compare
     << " restart_interval: " << args.restart_interval
     << " compression: " << args.compression
     << " compression_parallel_threads: " << args.compression_parallel_threads
     << " format_version: " << args.format_version
     << " use_mmap: " << args.use_mmap;

  return os;
}

619
static std::vector<TestArgs> GenerateArgList() {
K
Kai Liu 已提交
620 621
  std::vector<TestArgs> test_args;
  std::vector<TestType> test_types = {
622 623 624 625 626 627 628 629
      BLOCK_BASED_TABLE_TEST,
#ifndef ROCKSDB_LITE
      PLAIN_TABLE_SEMI_FIXED_PREFIX,
      PLAIN_TABLE_FULL_STR_PREFIX,
      PLAIN_TABLE_TOTAL_ORDER,
#endif  // !ROCKSDB_LITE
      BLOCK_TEST,
      MEMTABLE_TEST, DB_TEST};
K
Kai Liu 已提交
630 631
  std::vector<bool> reverse_compare_types = {false, true};
  std::vector<int> restart_intervals = {16, 1, 1024};
632
  std::vector<uint32_t> compression_parallel_threads = {1, 4};
H
heyongqiang 已提交
633 634

  // Only add compression if it is supported
635 636
  std::vector<std::pair<CompressionType, bool>> compression_types;
  compression_types.emplace_back(kNoCompression, false);
I
Igor Canadi 已提交
637
  if (Snappy_Supported()) {
638
    compression_types.emplace_back(kSnappyCompression, false);
K
Kai Liu 已提交
639
  }
I
Igor Canadi 已提交
640
  if (Zlib_Supported()) {
641 642
    compression_types.emplace_back(kZlibCompression, false);
    compression_types.emplace_back(kZlibCompression, true);
K
Kai Liu 已提交
643
  }
I
Igor Canadi 已提交
644
  if (BZip2_Supported()) {
645 646
    compression_types.emplace_back(kBZip2Compression, false);
    compression_types.emplace_back(kBZip2Compression, true);
K
Kai Liu 已提交
647
  }
I
Igor Canadi 已提交
648
  if (LZ4_Supported()) {
649 650 651 652
    compression_types.emplace_back(kLZ4Compression, false);
    compression_types.emplace_back(kLZ4Compression, true);
    compression_types.emplace_back(kLZ4HCCompression, false);
    compression_types.emplace_back(kLZ4HCCompression, true);
A
Albert Strasheim 已提交
653
  }
654 655 656 657
  if (XPRESS_Supported()) {
    compression_types.emplace_back(kXpressCompression, false);
    compression_types.emplace_back(kXpressCompression, true);
  }
658
  if (ZSTD_Supported()) {
S
sdong 已提交
659 660
    compression_types.emplace_back(kZSTD, false);
    compression_types.emplace_back(kZSTD, true);
661
  }
H
heyongqiang 已提交
662

K
Kai Liu 已提交
663 664
  for (auto test_type : test_types) {
    for (auto reverse_compare : reverse_compare_types) {
665
#ifndef ROCKSDB_LITE
K
Kai Liu 已提交
666
      if (test_type == PLAIN_TABLE_SEMI_FIXED_PREFIX ||
667 668
          test_type == PLAIN_TABLE_FULL_STR_PREFIX ||
          test_type == PLAIN_TABLE_TOTAL_ORDER) {
669 670
        // Plain table doesn't use restart index or compression.
        TestArgs one_arg;
K
Kai Liu 已提交
671 672 673
        one_arg.type = test_type;
        one_arg.reverse_compare = reverse_compare;
        one_arg.restart_interval = restart_intervals[0];
674
        one_arg.compression = compression_types[0].first;
675
        one_arg.compression_parallel_threads = 1;
676
        one_arg.format_version = 0;
677 678 679
        one_arg.use_mmap = true;
        test_args.push_back(one_arg);
        one_arg.use_mmap = false;
K
Kai Liu 已提交
680
        test_args.push_back(one_arg);
681 682
        continue;
      }
683
#endif  // !ROCKSDB_LITE
H
heyongqiang 已提交
684

K
Kai Liu 已提交
685 686
      for (auto restart_interval : restart_intervals) {
        for (auto compression_type : compression_types) {
687 688 689 690 691 692 693
          for (auto num_threads : compression_parallel_threads) {
            TestArgs one_arg;
            one_arg.type = test_type;
            one_arg.reverse_compare = reverse_compare;
            one_arg.restart_interval = restart_interval;
            one_arg.compression = compression_type.first;
            one_arg.compression_parallel_threads = num_threads;
694
            one_arg.format_version = compression_type.second ? 2 : 1;
695 696 697
            one_arg.use_mmap = false;
            test_args.push_back(one_arg);
          }
698
        }
K
Kai Liu 已提交
699
      }
700
    }
K
Kai Liu 已提交
701 702
  }
  return test_args;
H
heyongqiang 已提交
703
}
J
jorlow@chromium.org 已提交
704

705 706 707 708 709 710 711 712 713 714 715 716 717
// In order to make all tests run for plain table format, including
// those operating on empty keys, create a new prefix transformer which
// return fixed prefix if the slice is not shorter than the prefix length,
// and the full slice if it is shorter.
class FixedOrLessPrefixTransform : public SliceTransform {
 private:
  const size_t prefix_len_;

 public:
  explicit FixedOrLessPrefixTransform(size_t prefix_len) :
      prefix_len_(prefix_len) {
  }

718
  const char* Name() const override { return "rocksdb.FixedPrefix"; }
719

720
  Slice Transform(const Slice& src) const override {
721 722 723 724 725 726 727
    assert(InDomain(src));
    if (src.size() < prefix_len_) {
      return src;
    }
    return Slice(src.data(), prefix_len_);
  }

728
  bool InDomain(const Slice& /*src*/) const override { return true; }
729

730
  bool InRange(const Slice& dst) const override {
731 732
    return (dst.size() <= prefix_len_);
  }
733
  bool FullLengthEnabled(size_t* /*len*/) const override { return false; }
734 735
};

I
Igor Sugak 已提交
736
class HarnessTest : public testing::Test {
J
jorlow@chromium.org 已提交
737
 public:
738 739 740
  explicit HarnessTest(const TestArgs& args)
      : args_(args),
        ioptions_(options_),
741
        moptions_(options_),
742 743 744 745
        write_buffer_(options_.db_write_buffer_size),
        support_prev_(true),
        only_support_prefix_seek_(false) {
    options_.compression = args_.compression;
746
    options_.compression_opts.parallel_threads =
747
        args_.compression_parallel_threads;
J
jorlow@chromium.org 已提交
748 749
    // Use shorter block size for tests to exercise block boundary
    // conditions more.
750
    if (args_.reverse_compare) {
J
jorlow@chromium.org 已提交
751 752
      options_.comparator = &reverse_key_comparator;
    }
753 754 755 756

    internal_comparator_.reset(
        new test::PlainInternalKeyComparator(options_.comparator));

757 758
    options_.allow_mmap_reads = args_.use_mmap;
    switch (args_.type) {
759
      case BLOCK_BASED_TABLE_TEST:
760
        table_options_.flush_block_policy_factory.reset(
761
            new FlushBlockBySizePolicyFactory());
762
        table_options_.block_size = 256;
763 764 765
        table_options_.block_restart_interval = args_.restart_interval;
        table_options_.index_block_restart_interval = args_.restart_interval;
        table_options_.format_version = args_.format_version;
766 767
        options_.table_factory.reset(
            new BlockBasedTableFactory(table_options_));
768 769
        constructor_.reset(new TableConstructor(
            options_.comparator, true /* convert_to_internal_key_ */));
770 771
        internal_comparator_.reset(
            new InternalKeyComparator(options_.comparator));
772
        break;
773 774
// Plain table is not supported in ROCKSDB_LITE
#ifndef ROCKSDB_LITE
775 776 777
      case PLAIN_TABLE_SEMI_FIXED_PREFIX:
        support_prev_ = false;
        only_support_prefix_seek_ = true;
778
        options_.prefix_extractor.reset(new FixedOrLessPrefixTransform(2));
779
        options_.table_factory.reset(NewPlainTableFactory());
780 781
        constructor_.reset(new TableConstructor(
            options_.comparator, true /* convert_to_internal_key_ */));
782 783
        internal_comparator_.reset(
            new InternalKeyComparator(options_.comparator));
784 785 786 787
        break;
      case PLAIN_TABLE_FULL_STR_PREFIX:
        support_prev_ = false;
        only_support_prefix_seek_ = true;
788
        options_.prefix_extractor.reset(NewNoopTransform());
789
        options_.table_factory.reset(NewPlainTableFactory());
790 791
        constructor_.reset(new TableConstructor(
            options_.comparator, true /* convert_to_internal_key_ */));
792 793 794 795 796 797 798
        internal_comparator_.reset(
            new InternalKeyComparator(options_.comparator));
        break;
      case PLAIN_TABLE_TOTAL_ORDER:
        support_prev_ = false;
        only_support_prefix_seek_ = false;
        options_.prefix_extractor = nullptr;
S
Stanislau Hlebik 已提交
799 800 801 802 803 804 805 806 807 808

        {
          PlainTableOptions plain_table_options;
          plain_table_options.user_key_len = kPlainTableVariableLength;
          plain_table_options.bloom_bits_per_key = 0;
          plain_table_options.hash_table_ratio = 0;

          options_.table_factory.reset(
              NewPlainTableFactory(plain_table_options));
        }
809 810
        constructor_.reset(new TableConstructor(
            options_.comparator, true /* convert_to_internal_key_ */));
811 812
        internal_comparator_.reset(
            new InternalKeyComparator(options_.comparator));
J
jorlow@chromium.org 已提交
813
        break;
814
#endif  // !ROCKSDB_LITE
J
jorlow@chromium.org 已提交
815
      case BLOCK_TEST:
816 817 818
        table_options_.block_size = 256;
        options_.table_factory.reset(
            new BlockBasedTableFactory(table_options_));
819
        constructor_.reset(new BlockConstructor(options_.comparator));
J
jorlow@chromium.org 已提交
820 821
        break;
      case MEMTABLE_TEST:
822 823 824
        table_options_.block_size = 256;
        options_.table_factory.reset(
            new BlockBasedTableFactory(table_options_));
825 826
        constructor_.reset(
            new MemTableConstructor(options_.comparator, &write_buffer_));
J
jorlow@chromium.org 已提交
827 828
        break;
      case DB_TEST:
829 830 831
        table_options_.block_size = 256;
        options_.table_factory.reset(
            new BlockBasedTableFactory(table_options_));
832
        constructor_.reset(new DBConstructor(options_.comparator));
J
jorlow@chromium.org 已提交
833 834
        break;
    }
L
Lei Jin 已提交
835
    ioptions_ = ImmutableCFOptions(options_);
836
    moptions_ = MutableCFOptions(options_);
J
jorlow@chromium.org 已提交
837 838 839 840 841 842 843 844
  }

  void Add(const std::string& key, const std::string& value) {
    constructor_->Add(key, value);
  }

  void Test(Random* rnd) {
    std::vector<std::string> keys;
845
    stl_wrappers::KVMap data;
846
    constructor_->Finish(options_, ioptions_, moptions_, table_options_,
L
Lei Jin 已提交
847
                         *internal_comparator_, &keys, &data);
J
jorlow@chromium.org 已提交
848 849

    TestForwardScan(keys, data);
850 851 852
    if (support_prev_) {
      TestBackwardScan(keys, data);
    }
J
jorlow@chromium.org 已提交
853 854 855
    TestRandomAccess(rnd, keys, data);
  }

A
Andrew Kryczka 已提交
856
  void TestForwardScan(const std::vector<std::string>& /*keys*/,
857
                       const stl_wrappers::KVMap& data) {
S
sdong 已提交
858
    InternalIterator* iter = constructor_->NewIterator();
J
jorlow@chromium.org 已提交
859 860
    ASSERT_TRUE(!iter->Valid());
    iter->SeekToFirst();
861 862
    for (stl_wrappers::KVMap::const_iterator model_iter = data.begin();
         model_iter != data.end(); ++model_iter) {
J
jorlow@chromium.org 已提交
863 864 865 866
      ASSERT_EQ(ToString(data, model_iter), ToString(iter));
      iter->Next();
    }
    ASSERT_TRUE(!iter->Valid());
867
    if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
S
sdong 已提交
868
      iter->~InternalIterator();
869 870 871
    } else {
      delete iter;
    }
J
jorlow@chromium.org 已提交
872 873
  }

A
Andrew Kryczka 已提交
874
  void TestBackwardScan(const std::vector<std::string>& /*keys*/,
875
                        const stl_wrappers::KVMap& data) {
S
sdong 已提交
876
    InternalIterator* iter = constructor_->NewIterator();
J
jorlow@chromium.org 已提交
877 878
    ASSERT_TRUE(!iter->Valid());
    iter->SeekToLast();
879 880
    for (stl_wrappers::KVMap::const_reverse_iterator model_iter = data.rbegin();
         model_iter != data.rend(); ++model_iter) {
J
jorlow@chromium.org 已提交
881 882 883 884
      ASSERT_EQ(ToString(data, model_iter), ToString(iter));
      iter->Prev();
    }
    ASSERT_TRUE(!iter->Valid());
885
    if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
S
sdong 已提交
886
      iter->~InternalIterator();
887 888 889
    } else {
      delete iter;
    }
J
jorlow@chromium.org 已提交
890 891
  }

892 893
  void TestRandomAccess(Random* rnd, const std::vector<std::string>& keys,
                        const stl_wrappers::KVMap& data) {
J
jorlow@chromium.org 已提交
894
    static const bool kVerbose = false;
S
sdong 已提交
895
    InternalIterator* iter = constructor_->NewIterator();
J
jorlow@chromium.org 已提交
896
    ASSERT_TRUE(!iter->Valid());
897
    stl_wrappers::KVMap::const_iterator model_iter = data.begin();
J
jorlow@chromium.org 已提交
898 899
    if (kVerbose) fprintf(stderr, "---\n");
    for (int i = 0; i < 200; i++) {
900
      const int toss = rnd->Uniform(support_prev_ ? 5 : 3);
J
jorlow@chromium.org 已提交
901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957
      switch (toss) {
        case 0: {
          if (iter->Valid()) {
            if (kVerbose) fprintf(stderr, "Next\n");
            iter->Next();
            ++model_iter;
            ASSERT_EQ(ToString(data, model_iter), ToString(iter));
          }
          break;
        }

        case 1: {
          if (kVerbose) fprintf(stderr, "SeekToFirst\n");
          iter->SeekToFirst();
          model_iter = data.begin();
          ASSERT_EQ(ToString(data, model_iter), ToString(iter));
          break;
        }

        case 2: {
          std::string key = PickRandomKey(rnd, keys);
          model_iter = data.lower_bound(key);
          if (kVerbose) fprintf(stderr, "Seek '%s'\n",
                                EscapeString(key).c_str());
          iter->Seek(Slice(key));
          ASSERT_EQ(ToString(data, model_iter), ToString(iter));
          break;
        }

        case 3: {
          if (iter->Valid()) {
            if (kVerbose) fprintf(stderr, "Prev\n");
            iter->Prev();
            if (model_iter == data.begin()) {
              model_iter = data.end();   // Wrap around to invalid value
            } else {
              --model_iter;
            }
            ASSERT_EQ(ToString(data, model_iter), ToString(iter));
          }
          break;
        }

        case 4: {
          if (kVerbose) fprintf(stderr, "SeekToLast\n");
          iter->SeekToLast();
          if (keys.empty()) {
            model_iter = data.end();
          } else {
            std::string last = data.rbegin()->first;
            model_iter = data.lower_bound(last);
          }
          ASSERT_EQ(ToString(data, model_iter), ToString(iter));
          break;
        }
      }
    }
958
    if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
S
sdong 已提交
959
      iter->~InternalIterator();
960 961 962
    } else {
      delete iter;
    }
J
jorlow@chromium.org 已提交
963 964
  }

965 966
  std::string ToString(const stl_wrappers::KVMap& data,
                       const stl_wrappers::KVMap::const_iterator& it) {
J
jorlow@chromium.org 已提交
967 968 969 970 971 972 973
    if (it == data.end()) {
      return "END";
    } else {
      return "'" + it->first + "->" + it->second + "'";
    }
  }

974 975
  std::string ToString(const stl_wrappers::KVMap& data,
                       const stl_wrappers::KVMap::const_reverse_iterator& it) {
J
jorlow@chromium.org 已提交
976 977 978 979 980 981 982
    if (it == data.rend()) {
      return "END";
    } else {
      return "'" + it->first + "->" + it->second + "'";
    }
  }

S
sdong 已提交
983
  std::string ToString(const InternalIterator* it) {
J
jorlow@chromium.org 已提交
984 985 986 987 988 989 990 991 992 993 994
    if (!it->Valid()) {
      return "END";
    } else {
      return "'" + it->key().ToString() + "->" + it->value().ToString() + "'";
    }
  }

  std::string PickRandomKey(Random* rnd, const std::vector<std::string>& keys) {
    if (keys.empty()) {
      return "foo";
    } else {
995
      const int index = rnd->Uniform(static_cast<int>(keys.size()));
J
jorlow@chromium.org 已提交
996
      std::string result = keys[index];
997
      switch (rnd->Uniform(support_prev_ ? 3 : 1)) {
J
jorlow@chromium.org 已提交
998 999 1000 1001 1002
        case 0:
          // Return an existing key
          break;
        case 1: {
          // Attempt to return something smaller than an existing key
1003 1004 1005 1006 1007
          if (result.size() > 0 && result[result.size() - 1] > '\0'
              && (!only_support_prefix_seek_
                  || options_.prefix_extractor->Transform(result).size()
                  < result.size())) {
            result[result.size() - 1]--;
J
jorlow@chromium.org 已提交
1008 1009
          }
          break;
1010
      }
J
jorlow@chromium.org 已提交
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
        case 2: {
          // Return something larger than an existing key
          Increment(options_.comparator, &result);
          break;
        }
      }
      return result;
    }
  }

A
Abhishek Kona 已提交
1021
  // Returns nullptr if not running against a DB
J
jorlow@chromium.org 已提交
1022 1023
  DB* db() const { return constructor_->db(); }

J
jorlow@chromium.org 已提交
1024
 private:
1025 1026
  TestArgs args_;
  Options options_;
L
Lei Jin 已提交
1027
  ImmutableCFOptions ioptions_;
1028
  MutableCFOptions moptions_;
1029 1030
  BlockBasedTableOptions table_options_;
  std::unique_ptr<Constructor> constructor_;
1031
  WriteBufferManager write_buffer_;
1032 1033
  bool support_prev_;
  bool only_support_prefix_seek_;
1034
  std::shared_ptr<InternalKeyComparator> internal_comparator_;
J
jorlow@chromium.org 已提交
1035 1036
};

1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054
class ParameterizedHarnessTest : public HarnessTest,
                                 public testing::WithParamInterface<TestArgs> {
 public:
  ParameterizedHarnessTest() : HarnessTest(GetParam()) {}
};

INSTANTIATE_TEST_CASE_P(TableTest, ParameterizedHarnessTest,
                        ::testing::ValuesIn(GenerateArgList()));

class DBHarnessTest : public HarnessTest {
 public:
  DBHarnessTest()
      : HarnessTest(TestArgs{DB_TEST, /* reverse_compare */ false,
                             /* restart_interval */ 16, kNoCompression,
                             /* compression_parallel_threads */ 1,
                             /* format_version */ 0, /* use_mmap */ false}) {}
};

J
jorlow@chromium.org 已提交
1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
static bool Between(uint64_t val, uint64_t low, uint64_t high) {
  bool result = (val >= low) && (val <= high);
  if (!result) {
    fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
            (unsigned long long)(val),
            (unsigned long long)(low),
            (unsigned long long)(high));
  }
  return result;
}

K
Kai Liu 已提交
1066
// Tests against all kinds of tables
I
Igor Sugak 已提交
1067
class TableTest : public testing::Test {
1068 1069 1070 1071 1072 1073 1074 1075 1076
 public:
  const InternalKeyComparator& GetPlainInternalComparator(
      const Comparator* comp) {
    if (!plain_internal_comparator) {
      plain_internal_comparator.reset(
          new test::PlainInternalKeyComparator(comp));
    }
    return *plain_internal_comparator;
  }
M
Maysam Yabandeh 已提交
1077
  void IndexTest(BlockBasedTableOptions table_options);
1078 1079 1080 1081 1082 1083

 private:
  std::unique_ptr<InternalKeyComparator> plain_internal_comparator;
};

class GeneralTableTest : public TableTest {};
1084 1085 1086 1087
class BlockBasedTableTest
    : public TableTest,
      virtual public ::testing::WithParamInterface<uint32_t> {
 public:
1088
  BlockBasedTableTest() : format_(GetParam()) {
1089
    env_ = ROCKSDB_NAMESPACE::Env::Default();
1090
  }
1091 1092 1093 1094 1095 1096 1097

  BlockBasedTableOptions GetBlockBasedTableOptions() {
    BlockBasedTableOptions options;
    options.format_version = format_;
    return options;
  }

1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125
  void SetupTracingTest(TableConstructor* c) {
    test_path_ = test::PerThreadDBPath("block_based_table_tracing_test");
    EXPECT_OK(env_->CreateDir(test_path_));
    trace_file_path_ = test_path_ + "/block_cache_trace_file";
    TraceOptions trace_opt;
    std::unique_ptr<TraceWriter> trace_writer;
    EXPECT_OK(NewFileTraceWriter(env_, EnvOptions(), trace_file_path_,
                                 &trace_writer));
    c->block_cache_tracer_.StartTrace(env_, trace_opt, std::move(trace_writer));
    {
      std::string user_key = "k01";
      InternalKey internal_key(user_key, 0, kTypeValue);
      std::string encoded_key = internal_key.Encode().ToString();
      c->Add(encoded_key, kDummyValue);
    }
    {
      std::string user_key = "k02";
      InternalKey internal_key(user_key, 0, kTypeValue);
      std::string encoded_key = internal_key.Encode().ToString();
      c->Add(encoded_key, kDummyValue);
    }
  }

  void VerifyBlockAccessTrace(
      TableConstructor* c,
      const std::vector<BlockCacheTraceRecord>& expected_records) {
    c->block_cache_tracer_.EndTrace();

B
Burton Li 已提交
1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139
    {
      std::unique_ptr<TraceReader> trace_reader;
      Status s =
          NewFileTraceReader(env_, EnvOptions(), trace_file_path_, &trace_reader);
      EXPECT_OK(s);
      BlockCacheTraceReader reader(std::move(trace_reader));
      BlockCacheTraceHeader header;
      EXPECT_OK(reader.ReadHeader(&header));
      uint32_t index = 0;
      while (s.ok()) {
        BlockCacheTraceRecord access;
        s = reader.ReadAccess(&access);
        if (!s.ok()) {
          break;
1140
        }
B
Burton Li 已提交
1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169
        ASSERT_LT(index, expected_records.size());
        EXPECT_NE("", access.block_key);
        EXPECT_EQ(access.block_type, expected_records[index].block_type);
        EXPECT_GT(access.block_size, 0);
        EXPECT_EQ(access.caller, expected_records[index].caller);
        EXPECT_EQ(access.no_insert, expected_records[index].no_insert);
        EXPECT_EQ(access.is_cache_hit, expected_records[index].is_cache_hit);
        // Get
        if (access.caller == TableReaderCaller::kUserGet) {
          EXPECT_EQ(access.referenced_key,
                    expected_records[index].referenced_key);
          EXPECT_EQ(access.get_id, expected_records[index].get_id);
          EXPECT_EQ(access.get_from_user_specified_snapshot,
                    expected_records[index].get_from_user_specified_snapshot);
          if (access.block_type == TraceType::kBlockTraceDataBlock) {
            EXPECT_GT(access.referenced_data_size, 0);
            EXPECT_GT(access.num_keys_in_block, 0);
            EXPECT_EQ(access.referenced_key_exist_in_block,
                      expected_records[index].referenced_key_exist_in_block);
          }
        } else {
          EXPECT_EQ(access.referenced_key, "");
          EXPECT_EQ(access.get_id, 0);
          EXPECT_TRUE(access.get_from_user_specified_snapshot == Boolean::kFalse);
          EXPECT_EQ(access.referenced_data_size, 0);
          EXPECT_EQ(access.num_keys_in_block, 0);
          EXPECT_TRUE(access.referenced_key_exist_in_block == Boolean::kFalse);
        }
        index++;
1170
      }
B
Burton Li 已提交
1171
      EXPECT_EQ(index, expected_records.size());
1172 1173 1174 1175 1176
    }
    EXPECT_OK(env_->DeleteFile(trace_file_path_));
    EXPECT_OK(env_->DeleteDir(test_path_));
  }

1177 1178
 protected:
  uint64_t IndexUncompressedHelper(bool indexCompress);
1179 1180 1181

 private:
  uint32_t format_;
1182 1183 1184
  Env* env_;
  std::string trace_file_path_;
  std::string test_path_;
1185
};
1186
class PlainTableTest : public TableTest {};
I
Igor Sugak 已提交
1187
class TablePropertyTest : public testing::Test {};
1188
class BBTTailPrefetchTest : public TableTest {};
1189

1190 1191 1192 1193 1194 1195 1196 1197 1198
// The helper class to test the file checksum
class FileChecksumTestHelper {
 public:
  FileChecksumTestHelper(bool convert_to_internal_key = false)
      : convert_to_internal_key_(convert_to_internal_key) {
  }
  ~FileChecksumTestHelper() {}

  void CreateWriteableFile() {
1199
    sink_ = new test::StringSink();
1200 1201 1202
    file_writer_.reset(test::GetWritableFileWriter(sink_, "" /* don't care */));
  }

1203
  void SetFileChecksumGenerator(FileChecksumGenerator* checksum_generator) {
1204
    if (file_writer_ != nullptr) {
1205
      file_writer_->TEST_SetFileChecksumGenerator(checksum_generator);
1206 1207
    } else {
      delete checksum_generator;
1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247
    }
  }

  WritableFileWriter* GetFileWriter() { return file_writer_.get(); }

  Status ResetTableBuilder(std::unique_ptr<TableBuilder>&& builder) {
    assert(builder != nullptr);
    table_builder_ = std::move(builder);
    return Status::OK();
  }

  void AddKVtoKVMap(int num_entries) {
    Random rnd(test::RandomSeed());
    for (int i = 0; i < num_entries; i++) {
      std::string v;
      test::RandomString(&rnd, 100, &v);
      kv_map_[test::RandomKey(&rnd, 20)] = v;
    }
  }

  Status WriteKVAndFlushTable() {
    for (const auto kv : kv_map_) {
      if (convert_to_internal_key_) {
        ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
        std::string encoded;
        AppendInternalKey(&encoded, ikey);
        table_builder_->Add(encoded, kv.second);
      } else {
        table_builder_->Add(kv.first, kv.second);
      }
      EXPECT_TRUE(table_builder_->status().ok());
    }
    Status s = table_builder_->Finish();
    file_writer_->Flush();
    EXPECT_TRUE(s.ok());

    EXPECT_EQ(sink_->contents().size(), table_builder_->FileSize());
    return s;
  }

1248 1249 1250 1251
  std::string GetFileChecksum() {
    file_writer_->Close();
    return table_builder_->GetFileChecksum();
  }
1252 1253 1254 1255 1256

  const char* GetFileChecksumFuncName() {
    return table_builder_->GetFileChecksumFuncName();
  }

1257
  Status CalculateFileChecksum(FileChecksumGenerator* file_checksum_generator,
1258
                               std::string* checksum) {
1259
    assert(file_checksum_generator != nullptr);
1260 1261
    cur_uniq_id_ = checksum_uniq_id_++;
    test::StringSink* ss_rw =
1262 1263
        ROCKSDB_NAMESPACE::test::GetStringSinkFromLegacyWriter(
            file_writer_.get());
1264 1265 1266 1267 1268 1269
    file_reader_.reset(test::GetRandomAccessFileReader(
        new test::StringSource(ss_rw->contents())));
    std::unique_ptr<char[]> scratch(new char[2048]);
    Slice result;
    uint64_t offset = 0;
    Status s;
1270 1271
    s = file_reader_->Read(IOOptions(), offset, 2048, &result, scratch.get(),
                           nullptr, false);
1272 1273 1274 1275
    if (!s.ok()) {
      return s;
    }
    while (result.size() != 0) {
1276
      file_checksum_generator->Update(scratch.get(), result.size());
1277
      offset += static_cast<uint64_t>(result.size());
1278 1279
      s = file_reader_->Read(IOOptions(), offset, 2048, &result, scratch.get(),
                             nullptr, false);
1280 1281 1282 1283 1284
      if (!s.ok()) {
        return s;
      }
    }
    EXPECT_EQ(offset, static_cast<uint64_t>(table_builder_->FileSize()));
1285 1286
    file_checksum_generator->Finalize();
    *checksum = file_checksum_generator->GetChecksum();
1287 1288 1289 1290 1291 1292 1293 1294 1295 1296
    return Status::OK();
  }

 private:
  bool convert_to_internal_key_;
  uint64_t cur_uniq_id_;
  std::unique_ptr<WritableFileWriter> file_writer_;
  std::unique_ptr<RandomAccessFileReader> file_reader_;
  std::unique_ptr<TableBuilder> table_builder_;
  stl_wrappers::KVMap kv_map_;
1297
  test::StringSink* sink_ = nullptr;
1298 1299 1300 1301 1302 1303

  static uint64_t checksum_uniq_id_;
};

uint64_t FileChecksumTestHelper::checksum_uniq_id_ = 1;

1304 1305 1306 1307
INSTANTIATE_TEST_CASE_P(FormatDef, BlockBasedTableTest,
                        testing::Values(test::kDefaultFormatVersion));
INSTANTIATE_TEST_CASE_P(FormatLatest, BlockBasedTableTest,
                        testing::Values(test::kLatestFormatVersion));
1308

1309 1310
// This test serves as the living tutorial for the prefix scan of user collected
// properties.
I
Igor Sugak 已提交
1311
TEST_F(TablePropertyTest, PrefixScanTest) {
1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329
  UserCollectedProperties props{{"num.111.1", "1"},
                                {"num.111.2", "2"},
                                {"num.111.3", "3"},
                                {"num.333.1", "1"},
                                {"num.333.2", "2"},
                                {"num.333.3", "3"},
                                {"num.555.1", "1"},
                                {"num.555.2", "2"},
                                {"num.555.3", "3"}, };

  // prefixes that exist
  for (const std::string& prefix : {"num.111", "num.333", "num.555"}) {
    int num = 0;
    for (auto pos = props.lower_bound(prefix);
         pos != props.end() &&
             pos->first.compare(0, prefix.size(), prefix) == 0;
         ++pos) {
      ++num;
1330
      auto key = prefix + "." + ToString(num);
1331
      ASSERT_EQ(key, pos->first);
1332
      ASSERT_EQ(ToString(num), pos->second);
1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344
    }
    ASSERT_EQ(3, num);
  }

  // prefixes that don't exist
  for (const std::string& prefix :
       {"num.000", "num.222", "num.444", "num.666"}) {
    auto pos = props.lower_bound(prefix);
    ASSERT_TRUE(pos == props.end() ||
                pos->first.compare(0, prefix.size(), prefix) != 0);
  }
}
J
jorlow@chromium.org 已提交
1345

K
Kai Liu 已提交
1346 1347
// This test include all the basic checks except those for index size and block
// size, which will be conducted in separated unit tests.
1348
TEST_P(BlockBasedTableTest, BasicBlockBasedTableProperties) {
1349
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
K
Kai Liu 已提交
1350 1351 1352 1353 1354 1355 1356 1357 1358 1359

  c.Add("a1", "val1");
  c.Add("b2", "val2");
  c.Add("c3", "val3");
  c.Add("d4", "val4");
  c.Add("e5", "val5");
  c.Add("f6", "val6");
  c.Add("g7", "val7");
  c.Add("h8", "val8");
  c.Add("j9", "val9");
1360
  uint64_t diff_internal_user_bytes = 9 * 8;  // 8 is seq size, 9 k-v totally
K
Kai Liu 已提交
1361 1362

  std::vector<std::string> keys;
1363
  stl_wrappers::KVMap kvmap;
1364
  Options options;
K
Kai Liu 已提交
1365
  options.compression = kNoCompression;
1366
  options.statistics = CreateDBStatistics();
1367
  options.statistics->set_stats_level(StatsLevel::kAll);
1368
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1369 1370
  table_options.block_restart_interval = 1;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
K
Kai Liu 已提交
1371

1372
  ImmutableCFOptions ioptions(options);
1373
  MutableCFOptions moptions(options);
1374
  ioptions.statistics = options.statistics.get();
1375
  c.Finish(options, ioptions, moptions, table_options,
1376
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1377
  ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_NOT_COMPRESSED), 0);
K
Kai Liu 已提交
1378

1379
  auto& props = *c.GetTableReader()->GetTableProperties();
K
kailiu 已提交
1380
  ASSERT_EQ(kvmap.size(), props.num_entries);
K
Kai Liu 已提交
1381 1382 1383 1384

  auto raw_key_size = kvmap.size() * 2ul;
  auto raw_value_size = kvmap.size() * 4ul;

1385
  ASSERT_EQ(raw_key_size + diff_internal_user_bytes, props.raw_key_size);
K
kailiu 已提交
1386 1387 1388
  ASSERT_EQ(raw_value_size, props.raw_value_size);
  ASSERT_EQ(1ul, props.num_data_blocks);
  ASSERT_EQ("", props.filter_policy_name);  // no filter policy is used
K
Kai Liu 已提交
1389 1390

  // Verify data size.
I
Igor Canadi 已提交
1391
  BlockBuilder block_builder(1);
K
Kai Liu 已提交
1392 1393 1394 1395
  for (const auto& item : kvmap) {
    block_builder.Add(item.first, item.second);
  }
  Slice content = block_builder.Finish();
1396 1397
  ASSERT_EQ(content.size() + kBlockTrailerSize + diff_internal_user_bytes,
            props.data_size);
1398
  c.ResetTableReader();
K
Kai Liu 已提交
1399 1400
}

Y
Yi Wu 已提交
1401
#ifdef SNAPPY
1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414
uint64_t BlockBasedTableTest::IndexUncompressedHelper(bool compressed) {
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
  constexpr size_t kNumKeys = 10000;

  for (size_t k = 0; k < kNumKeys; ++k) {
    c.Add("key" + ToString(k), "val" + ToString(k));
  }

  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  Options options;
  options.compression = kSnappyCompression;
  options.statistics = CreateDBStatistics();
1415
  options.statistics->set_stats_level(StatsLevel::kAll);
1416
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1417 1418 1419 1420 1421
  table_options.block_restart_interval = 1;
  table_options.enable_index_compression = compressed;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));

  ImmutableCFOptions ioptions(options);
1422
  MutableCFOptions moptions(options);
1423
  ioptions.statistics = options.statistics.get();
1424
  c.Finish(options, ioptions, moptions, table_options,
1425 1426 1427 1428
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);
  c.ResetTableReader();
  return options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED);
}
1429
TEST_P(BlockBasedTableTest, IndexUncompressed) {
1430 1431 1432 1433 1434
  uint64_t tbl1_compressed_cnt = IndexUncompressedHelper(true);
  uint64_t tbl2_compressed_cnt = IndexUncompressedHelper(false);
  // tbl1_compressed_cnt should include 1 index block
  EXPECT_EQ(tbl2_compressed_cnt + 1, tbl1_compressed_cnt);
}
Y
Yi Wu 已提交
1435
#endif  // SNAPPY
1436

1437
TEST_P(BlockBasedTableTest, BlockBasedTableProperties2) {
1438 1439 1440 1441 1442 1443
  TableConstructor c(&reverse_key_comparator);
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;

  {
    Options options;
1444
    options.compression = CompressionType::kNoCompression;
1445
    BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1446 1447 1448
    options.table_factory.reset(NewBlockBasedTableFactory(table_options));

    const ImmutableCFOptions ioptions(options);
1449 1450
    const MutableCFOptions moptions(options);
    c.Finish(options, ioptions, moptions, table_options,
1451 1452 1453 1454 1455 1456 1457 1458
             GetPlainInternalComparator(options.comparator), &keys, &kvmap);

    auto& props = *c.GetTableReader()->GetTableProperties();

    // Default comparator
    ASSERT_EQ("leveldb.BytewiseComparator", props.comparator_name);
    // No merge operator
    ASSERT_EQ("nullptr", props.merge_operator_name);
A
Aaron Gao 已提交
1459 1460
    // No prefix extractor
    ASSERT_EQ("nullptr", props.prefix_extractor_name);
1461 1462 1463 1464
    // No property collectors
    ASSERT_EQ("[]", props.property_collectors_names);
    // No filter policy is used
    ASSERT_EQ("", props.filter_policy_name);
1465 1466
    // Compression type == that set:
    ASSERT_EQ("NoCompression", props.compression_name);
1467 1468 1469 1470 1471
    c.ResetTableReader();
  }

  {
    Options options;
1472
    BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1473 1474 1475
    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
    options.comparator = &reverse_key_comparator;
    options.merge_operator = MergeOperators::CreateUInt64AddOperator();
A
Aaron Gao 已提交
1476
    options.prefix_extractor.reset(NewNoopTransform());
1477 1478 1479 1480 1481 1482
    options.table_properties_collector_factories.emplace_back(
        new DummyPropertiesCollectorFactory1());
    options.table_properties_collector_factories.emplace_back(
        new DummyPropertiesCollectorFactory2());

    const ImmutableCFOptions ioptions(options);
1483 1484
    const MutableCFOptions moptions(options);
    c.Finish(options, ioptions, moptions, table_options,
1485 1486 1487 1488 1489 1490
             GetPlainInternalComparator(options.comparator), &keys, &kvmap);

    auto& props = *c.GetTableReader()->GetTableProperties();

    ASSERT_EQ("rocksdb.ReverseBytewiseComparator", props.comparator_name);
    ASSERT_EQ("UInt64AddOperator", props.merge_operator_name);
A
Aaron Gao 已提交
1491
    ASSERT_EQ("rocksdb.Noop", props.prefix_extractor_name);
1492 1493 1494 1495 1496 1497 1498
    ASSERT_EQ("[DummyPropertiesCollector1,DummyPropertiesCollector2]",
              props.property_collectors_names);
    ASSERT_EQ("", props.filter_policy_name);  // no filter policy is used
    c.ResetTableReader();
  }
}

1499
TEST_P(BlockBasedTableTest, RangeDelBlock) {
1500 1501 1502 1503
  TableConstructor c(BytewiseComparator());
  std::vector<std::string> keys = {"1pika", "2chu"};
  std::vector<std::string> vals = {"p", "c"};

1504 1505 1506 1507 1508 1509 1510
  std::vector<RangeTombstone> expected_tombstones = {
      {"1pika", "2chu", 0},
      {"2chu", "c", 1},
      {"2chu", "c", 0},
      {"c", "p", 0},
  };

1511 1512 1513 1514 1515 1516 1517 1518 1519 1520
  for (int i = 0; i < 2; i++) {
    RangeTombstone t(keys[i], vals[i], i);
    std::pair<InternalKey, Slice> p = t.Serialize();
    c.Add(p.first.Encode().ToString(), p.second);
  }

  std::vector<std::string> sorted_keys;
  stl_wrappers::KVMap kvmap;
  Options options;
  options.compression = kNoCompression;
1521
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1522 1523 1524 1525
  table_options.block_restart_interval = 1;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));

  const ImmutableCFOptions ioptions(options);
1526
  const MutableCFOptions moptions(options);
1527 1528
  std::unique_ptr<InternalKeyComparator> internal_cmp(
      new InternalKeyComparator(options.comparator));
1529 1530
  c.Finish(options, ioptions, moptions, table_options, *internal_cmp,
           &sorted_keys, &kvmap);
1531

1532 1533 1534 1535 1536 1537 1538 1539
  for (int j = 0; j < 2; ++j) {
    std::unique_ptr<InternalIterator> iter(
        c.GetTableReader()->NewRangeTombstoneIterator(ReadOptions()));
    if (j > 0) {
      // For second iteration, delete the table reader object and verify the
      // iterator can still access its metablock's range tombstones.
      c.ResetTableReader();
    }
A
Andrew Kryczka 已提交
1540
    ASSERT_FALSE(iter->Valid());
1541
    iter->SeekToFirst();
A
Andrew Kryczka 已提交
1542
    ASSERT_TRUE(iter->Valid());
1543
    for (size_t i = 0; i < expected_tombstones.size(); i++) {
1544 1545 1546 1547
      ASSERT_TRUE(iter->Valid());
      ParsedInternalKey parsed_key;
      ASSERT_TRUE(ParseInternalKey(iter->key(), &parsed_key));
      RangeTombstone t(parsed_key, iter->value());
1548 1549 1550 1551
      const auto& expected_t = expected_tombstones[i];
      ASSERT_EQ(t.start_key_, expected_t.start_key_);
      ASSERT_EQ(t.end_key_, expected_t.end_key_);
      ASSERT_EQ(t.seq_, expected_t.seq_);
1552 1553 1554 1555
      iter->Next();
    }
    ASSERT_TRUE(!iter->Valid());
  }
1556 1557
}

1558
TEST_P(BlockBasedTableTest, FilterPolicyNameProperties) {
1559
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1560 1561
  c.Add("a1", "val1");
  std::vector<std::string> keys;
1562
  stl_wrappers::KVMap kvmap;
1563
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1564
  table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1565
  Options options;
1566
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1567

L
Lei Jin 已提交
1568
  const ImmutableCFOptions ioptions(options);
1569 1570
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options,
1571
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1572
  auto& props = *c.GetTableReader()->GetTableProperties();
K
kailiu 已提交
1573
  ASSERT_EQ("rocksdb.BuiltinBloomFilter", props.filter_policy_name);
1574
  c.ResetTableReader();
1575 1576
}

1577 1578 1579 1580
//
// BlockBasedTableTest::PrefetchTest
//
void AssertKeysInCache(BlockBasedTable* table_reader,
1581
                       const std::vector<std::string>& keys_in_cache,
1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599
                       const std::vector<std::string>& keys_not_in_cache,
                       bool convert = false) {
  if (convert) {
    for (auto key : keys_in_cache) {
      InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
      ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
    }
    for (auto key : keys_not_in_cache) {
      InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
      ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
    }
  } else {
    for (auto key : keys_in_cache) {
      ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), key));
    }
    for (auto key : keys_not_in_cache) {
      ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), key));
    }
1600 1601 1602 1603
  }
}

void PrefetchRange(TableConstructor* c, Options* opt,
1604
                   BlockBasedTableOptions* table_options, const char* key_begin,
1605 1606 1607
                   const char* key_end,
                   const std::vector<std::string>& keys_in_cache,
                   const std::vector<std::string>& keys_not_in_cache,
1608 1609
                   const Status expected_status = Status::OK()) {
  // reset the cache and reopen the table
1610
  table_options->block_cache = NewLRUCache(16 * 1024 * 1024, 4);
1611 1612
  opt->table_factory.reset(NewBlockBasedTableFactory(*table_options));
  const ImmutableCFOptions ioptions2(*opt);
1613 1614
  const MutableCFOptions moptions(*opt);
  ASSERT_OK(c->Reopen(ioptions2, moptions));
1615 1616 1617

  // prefetch
  auto* table_reader = dynamic_cast<BlockBasedTable*>(c->GetTableReader());
1618
  Status s;
1619 1620
  std::unique_ptr<Slice> begin, end;
  std::unique_ptr<InternalKey> i_begin, i_end;
1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638
  if (key_begin != nullptr) {
    if (c->ConvertToInternalKey()) {
      i_begin.reset(new InternalKey(key_begin, kMaxSequenceNumber, kTypeValue));
      begin.reset(new Slice(i_begin->Encode()));
    } else {
      begin.reset(new Slice(key_begin));
    }
  }
  if (key_end != nullptr) {
    if (c->ConvertToInternalKey()) {
      i_end.reset(new InternalKey(key_end, kMaxSequenceNumber, kTypeValue));
      end.reset(new Slice(i_end->Encode()));
    } else {
      end.reset(new Slice(key_end));
    }
  }
  s = table_reader->Prefetch(begin.get(), end.get());

1639 1640 1641
  ASSERT_TRUE(s.code() == expected_status.code());

  // assert our expectation in cache warmup
1642 1643
  AssertKeysInCache(table_reader, keys_in_cache, keys_not_in_cache,
                    c->ConvertToInternalKey());
1644
  c->ResetTableReader();
1645 1646
}

1647
TEST_P(BlockBasedTableTest, PrefetchTest) {
1648 1649 1650
  // The purpose of this test is to test the prefetching operation built into
  // BlockBasedTable.
  Options opt;
1651
  std::unique_ptr<InternalKeyComparator> ikc;
1652 1653
  ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
  opt.compression = kNoCompression;
1654
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1655 1656
  table_options.block_size = 1024;
  // big enough so we don't ever lose cached values.
1657
  table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
1658 1659
  opt.table_factory.reset(NewBlockBasedTableFactory(table_options));

1660
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1661 1662 1663 1664 1665 1666 1667 1668
  c.Add("k01", "hello");
  c.Add("k02", "hello2");
  c.Add("k03", std::string(10000, 'x'));
  c.Add("k04", std::string(200000, 'x'));
  c.Add("k05", std::string(300000, 'x'));
  c.Add("k06", "hello3");
  c.Add("k07", std::string(100000, 'x'));
  std::vector<std::string> keys;
1669
  stl_wrappers::KVMap kvmap;
1670
  const ImmutableCFOptions ioptions(opt);
1671 1672
  const MutableCFOptions moptions(opt);
  c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
1673
  c.ResetTableReader();
1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685

  // We get the following data spread :
  //
  // Data block         Index
  // ========================
  // [ k01 k02 k03 ]    k03
  // [ k04         ]    k04
  // [ k05         ]    k05
  // [ k06 k07     ]    k07


  // Simple
1686 1687 1688 1689 1690
  PrefetchRange(&c, &opt, &table_options,
                /*key_range=*/"k01", "k05",
                /*keys_in_cache=*/{"k01", "k02", "k03", "k04", "k05"},
                /*keys_not_in_cache=*/{"k06", "k07"});
  PrefetchRange(&c, &opt, &table_options, "k01", "k01", {"k01", "k02", "k03"},
1691 1692
                {"k04", "k05", "k06", "k07"});
  // odd
1693 1694 1695
  PrefetchRange(&c, &opt, &table_options, "a", "z",
                {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
  PrefetchRange(&c, &opt, &table_options, "k00", "k00", {"k01", "k02", "k03"},
1696 1697
                {"k04", "k05", "k06", "k07"});
  // Edge cases
1698 1699 1700 1701
  PrefetchRange(&c, &opt, &table_options, "k00", "k06",
                {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
  PrefetchRange(&c, &opt, &table_options, "k00", "zzz",
                {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1702
  // null keys
1703 1704 1705 1706 1707 1708
  PrefetchRange(&c, &opt, &table_options, nullptr, nullptr,
                {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
  PrefetchRange(&c, &opt, &table_options, "k04", nullptr,
                {"k04", "k05", "k06", "k07"}, {"k01", "k02", "k03"});
  PrefetchRange(&c, &opt, &table_options, nullptr, "k05",
                {"k01", "k02", "k03", "k04", "k05"}, {"k06", "k07"});
1709
  // invalid
1710
  PrefetchRange(&c, &opt, &table_options, "k06", "k00", {}, {},
1711
                Status::InvalidArgument(Slice("k06 "), Slice("k07")));
1712
  c.ResetTableReader();
1713 1714
}

1715 1716
TEST_P(BlockBasedTableTest, TotalOrderSeekOnHashIndex) {
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1717
  for (int i = 0; i <= 5; ++i) {
1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746
    Options options;
    // Make each key/value an individual block
    table_options.block_size = 64;
    switch (i) {
    case 0:
      // Binary search index
      table_options.index_type = BlockBasedTableOptions::kBinarySearch;
      options.table_factory.reset(new BlockBasedTableFactory(table_options));
      break;
    case 1:
      // Hash search index
      table_options.index_type = BlockBasedTableOptions::kHashSearch;
      options.table_factory.reset(new BlockBasedTableFactory(table_options));
      options.prefix_extractor.reset(NewFixedPrefixTransform(4));
      break;
    case 2:
      // Hash search index with hash_index_allow_collision
      table_options.index_type = BlockBasedTableOptions::kHashSearch;
      table_options.hash_index_allow_collision = true;
      options.table_factory.reset(new BlockBasedTableFactory(table_options));
      options.prefix_extractor.reset(NewFixedPrefixTransform(4));
      break;
    case 3:
      // Hash search index with filter policy
      table_options.index_type = BlockBasedTableOptions::kHashSearch;
      table_options.filter_policy.reset(NewBloomFilterPolicy(10));
      options.table_factory.reset(new BlockBasedTableFactory(table_options));
      options.prefix_extractor.reset(NewFixedPrefixTransform(4));
      break;
M
Maysam Yabandeh 已提交
1747
    case 4:
1748
      // Two-level index
M
Maysam Yabandeh 已提交
1749 1750 1751
      table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
      options.table_factory.reset(new BlockBasedTableFactory(table_options));
      break;
1752 1753 1754 1755 1756 1757
    case 5:
      // Binary search with first key
      table_options.index_type =
          BlockBasedTableOptions::kBinarySearchWithFirstKey;
      options.table_factory.reset(new BlockBasedTableFactory(table_options));
      break;
1758 1759
    }

1760 1761
    TableConstructor c(BytewiseComparator(),
                       true /* convert_to_internal_key_ */);
1762 1763 1764 1765 1766 1767 1768 1769
    c.Add("aaaa1", std::string('a', 56));
    c.Add("bbaa1", std::string('a', 56));
    c.Add("cccc1", std::string('a', 56));
    c.Add("bbbb1", std::string('a', 56));
    c.Add("baaa1", std::string('a', 56));
    c.Add("abbb1", std::string('a', 56));
    c.Add("cccc2", std::string('a', 56));
    std::vector<std::string> keys;
1770
    stl_wrappers::KVMap kvmap;
L
Lei Jin 已提交
1771
    const ImmutableCFOptions ioptions(options);
1772 1773
    const MutableCFOptions moptions(options);
    c.Finish(options, ioptions, moptions, table_options,
1774 1775 1776 1777 1778 1779
             GetPlainInternalComparator(options.comparator), &keys, &kvmap);
    auto props = c.GetTableReader()->GetTableProperties();
    ASSERT_EQ(7u, props->num_data_blocks);
    auto* reader = c.GetTableReader();
    ReadOptions ro;
    ro.total_order_seek = true;
1780 1781 1782
    std::unique_ptr<InternalIterator> iter(reader->NewIterator(
        ro, moptions.prefix_extractor.get(), /*arena=*/nullptr,
        /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812

    iter->Seek(InternalKey("b", 0, kTypeValue).Encode());
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("baaa1", ExtractUserKey(iter->key()).ToString());
    iter->Next();
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString());

    iter->Seek(InternalKey("bb", 0, kTypeValue).Encode());
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString());
    iter->Next();
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString());

    iter->Seek(InternalKey("bbb", 0, kTypeValue).Encode());
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString());
    iter->Next();
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("cccc1", ExtractUserKey(iter->key()).ToString());
  }
}

1813 1814
TEST_P(BlockBasedTableTest, NoopTransformSeek) {
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830
  table_options.filter_policy.reset(NewBloomFilterPolicy(10));

  Options options;
  options.comparator = BytewiseComparator();
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
  options.prefix_extractor.reset(NewNoopTransform());

  TableConstructor c(options.comparator);
  // To tickle the PrefixMayMatch bug it is important that the
  // user-key is a single byte so that the index key exactly matches
  // the user-key.
  InternalKey key("a", 1, kTypeValue);
  c.Add(key.Encode().ToString(), "b");
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  const ImmutableCFOptions ioptions(options);
1831
  const MutableCFOptions moptions(options);
1832
  const InternalKeyComparator internal_comparator(options.comparator);
1833 1834
  c.Finish(options, ioptions, moptions, table_options, internal_comparator,
           &keys, &kvmap);
1835 1836 1837 1838 1839

  auto* reader = c.GetTableReader();
  for (int i = 0; i < 2; ++i) {
    ReadOptions ro;
    ro.total_order_seek = (i == 0);
1840 1841 1842
    std::unique_ptr<InternalIterator> iter(reader->NewIterator(
        ro, moptions.prefix_extractor.get(), /*arena=*/nullptr,
        /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1843 1844 1845 1846 1847 1848 1849 1850

    iter->Seek(key.Encode());
    ASSERT_OK(iter->status());
    ASSERT_TRUE(iter->Valid());
    ASSERT_EQ("a", ExtractUserKey(iter->key()).ToString());
  }
}

1851
TEST_P(BlockBasedTableTest, SkipPrefixBloomFilter) {
A
Aaron Gao 已提交
1852 1853
  // if DB is opened with a prefix extractor of a different name,
  // prefix bloom is skipped when read the file
1854
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
A
Aaron Gao 已提交
1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868
  table_options.filter_policy.reset(NewBloomFilterPolicy(2));
  table_options.whole_key_filtering = false;

  Options options;
  options.comparator = BytewiseComparator();
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
  options.prefix_extractor.reset(NewFixedPrefixTransform(1));

  TableConstructor c(options.comparator);
  InternalKey key("abcdefghijk", 1, kTypeValue);
  c.Add(key.Encode().ToString(), "test");
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  const ImmutableCFOptions ioptions(options);
1869
  const MutableCFOptions moptions(options);
A
Aaron Gao 已提交
1870
  const InternalKeyComparator internal_comparator(options.comparator);
1871 1872 1873
  c.Finish(options, ioptions, moptions, table_options, internal_comparator,
           &keys, &kvmap);
  // TODO(Zhongyi): update test to use MutableCFOptions
A
Aaron Gao 已提交
1874 1875
  options.prefix_extractor.reset(NewFixedPrefixTransform(9));
  const ImmutableCFOptions new_ioptions(options);
1876 1877
  const MutableCFOptions new_moptions(options);
  c.Reopen(new_ioptions, new_moptions);
A
Aaron Gao 已提交
1878
  auto reader = c.GetTableReader();
1879 1880 1881
  std::unique_ptr<InternalIterator> db_iter(reader->NewIterator(
      ReadOptions(), new_moptions.prefix_extractor.get(), /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized));
A
Aaron Gao 已提交
1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893

  // Test point lookup
  // only one kv
  for (auto& kv : kvmap) {
    db_iter->Seek(kv.first);
    ASSERT_TRUE(db_iter->Valid());
    ASSERT_OK(db_iter->status());
    ASSERT_EQ(db_iter->key(), kv.first);
    ASSERT_EQ(db_iter->value(), kv.second);
  }
}

K
Kai Liu 已提交
1894 1895 1896 1897 1898 1899
static std::string RandomString(Random* rnd, int len) {
  std::string r;
  test::RandomString(rnd, len, &r);
  return r;
}

1900
void AddInternalKey(TableConstructor* c, const std::string& prefix,
1901
                    std::string value = "v", int /*suffix_len*/ = 800) {
1902 1903
  static Random rnd(1023);
  InternalKey k(prefix + RandomString(&rnd, 800), 0, kTypeValue);
1904
  c->Add(k.Encode().ToString(), value);
1905 1906
}

M
Maysam Yabandeh 已提交
1907
void TableTest::IndexTest(BlockBasedTableOptions table_options) {
1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927
  TableConstructor c(BytewiseComparator());

  // keys with prefix length 3, make sure the key/value is big enough to fill
  // one block
  AddInternalKey(&c, "0015");
  AddInternalKey(&c, "0035");

  AddInternalKey(&c, "0054");
  AddInternalKey(&c, "0055");

  AddInternalKey(&c, "0056");
  AddInternalKey(&c, "0057");

  AddInternalKey(&c, "0058");
  AddInternalKey(&c, "0075");

  AddInternalKey(&c, "0076");
  AddInternalKey(&c, "0095");

  std::vector<std::string> keys;
1928
  stl_wrappers::KVMap kvmap;
1929
  Options options;
1930 1931
  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
  table_options.block_size = 1700;
1932
  table_options.block_cache = NewLRUCache(1024, 4);
1933
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1934 1935 1936

  std::unique_ptr<InternalKeyComparator> comparator(
      new InternalKeyComparator(BytewiseComparator()));
L
Lei Jin 已提交
1937
  const ImmutableCFOptions ioptions(options);
1938 1939 1940
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
           &kvmap);
1941
  auto reader = c.GetTableReader();
1942

1943
  auto props = reader->GetTableProperties();
1944 1945
  ASSERT_EQ(5u, props->num_data_blocks);

1946
  // TODO(Zhongyi): update test to use MutableCFOptions
1947 1948 1949
  std::unique_ptr<InternalIterator> index_iter(reader->NewIterator(
      ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1950 1951 1952

  // -- Find keys do not exist, but have common prefix.
  std::vector<std::string> prefixes = {"001", "003", "005", "007", "009"};
1953 1954 1955
  std::vector<std::string> lower_bound = {
      keys[0], keys[1], keys[2], keys[7], keys[9],
  };
1956 1957 1958

  // find the lower bound of the prefix
  for (size_t i = 0; i < prefixes.size(); ++i) {
M
Maysam Yabandeh 已提交
1959 1960 1961
    index_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode());
    ASSERT_OK(index_iter->status());
    ASSERT_TRUE(index_iter->Valid());
1962 1963

    // seek the first element in the block
M
Maysam Yabandeh 已提交
1964 1965
    ASSERT_EQ(lower_bound[i], index_iter->key().ToString());
    ASSERT_EQ("v", index_iter->value().ToString());
1966 1967 1968 1969 1970 1971 1972 1973
  }

  // find the upper bound of prefixes
  std::vector<std::string> upper_bound = {keys[1], keys[2], keys[7], keys[9], };

  // find existing keys
  for (const auto& item : kvmap) {
    auto ukey = ExtractUserKey(item.first).ToString();
M
Maysam Yabandeh 已提交
1974
    index_iter->Seek(ukey);
1975 1976

    // ASSERT_OK(regular_iter->status());
M
Maysam Yabandeh 已提交
1977
    ASSERT_OK(index_iter->status());
1978 1979

    // ASSERT_TRUE(regular_iter->Valid());
M
Maysam Yabandeh 已提交
1980
    ASSERT_TRUE(index_iter->Valid());
1981

M
Maysam Yabandeh 已提交
1982 1983
    ASSERT_EQ(item.first, index_iter->key().ToString());
    ASSERT_EQ(item.second, index_iter->value().ToString());
1984 1985 1986 1987 1988
  }

  for (size_t i = 0; i < prefixes.size(); ++i) {
    // the key is greater than any existing keys.
    auto key = prefixes[i] + "9";
M
Maysam Yabandeh 已提交
1989
    index_iter->Seek(InternalKey(key, 0, kTypeValue).Encode());
1990

1991 1992
    ASSERT_TRUE(index_iter->status().ok() || index_iter->status().IsNotFound());
    ASSERT_TRUE(!index_iter->status().IsNotFound() || !index_iter->Valid());
1993 1994
    if (i == prefixes.size() - 1) {
      // last key
M
Maysam Yabandeh 已提交
1995
      ASSERT_TRUE(!index_iter->Valid());
1996
    } else {
M
Maysam Yabandeh 已提交
1997
      ASSERT_TRUE(index_iter->Valid());
1998
      // seek the first element in the block
M
Maysam Yabandeh 已提交
1999 2000
      ASSERT_EQ(upper_bound[i], index_iter->key().ToString());
      ASSERT_EQ("v", index_iter->value().ToString());
2001 2002 2003 2004 2005 2006
    }
  }

  // find keys with prefix that don't match any of the existing prefixes.
  std::vector<std::string> non_exist_prefixes = {"002", "004", "006", "008"};
  for (const auto& prefix : non_exist_prefixes) {
M
Maysam Yabandeh 已提交
2007
    index_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode());
2008 2009
    // regular_iter->Seek(prefix);

M
Maysam Yabandeh 已提交
2010
    ASSERT_OK(index_iter->status());
2011 2012
    // Seek to non-existing prefixes should yield either invalid, or a
    // key with prefix greater than the target.
M
Maysam Yabandeh 已提交
2013 2014
    if (index_iter->Valid()) {
      Slice ukey = ExtractUserKey(index_iter->key());
2015 2016 2017
      Slice ukey_prefix = options.prefix_extractor->Transform(ukey);
      ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) < 0);
    }
2018
  }
2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031
  for (const auto& prefix : non_exist_prefixes) {
    index_iter->SeekForPrev(InternalKey(prefix, 0, kTypeValue).Encode());
    // regular_iter->Seek(prefix);

    ASSERT_OK(index_iter->status());
    // Seek to non-existing prefixes should yield either invalid, or a
    // key with prefix greater than the target.
    if (index_iter->Valid()) {
      Slice ukey = ExtractUserKey(index_iter->key());
      Slice ukey_prefix = options.prefix_extractor->Transform(ukey);
      ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) > 0);
    }
  }
2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105

  {
    // Test reseek case. It should impact partitioned index more.
    ReadOptions ro;
    ro.total_order_seek = true;
    std::unique_ptr<InternalIterator> index_iter2(reader->NewIterator(
        ro, moptions.prefix_extractor.get(), /*arena=*/nullptr,
        /*skip_filters=*/false, TableReaderCaller::kUncategorized));

    // Things to cover in partitioned index:
    // 1. Both of Seek() and SeekToLast() has optimization to prevent
    //    rereek leaf index block if it remains to the same one, and
    //    they reuse the same variable.
    // 2. When Next() or Prev() is called, the block moves, so the
    //    optimization should kick in only with the current one.
    index_iter2->Seek(InternalKey("0055", 0, kTypeValue).Encode());
    ASSERT_TRUE(index_iter2->Valid());
    ASSERT_EQ("0055", index_iter2->key().ToString().substr(0, 4));

    index_iter2->SeekToLast();
    ASSERT_TRUE(index_iter2->Valid());
    ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));

    index_iter2->Seek(InternalKey("0055", 0, kTypeValue).Encode());
    ASSERT_TRUE(index_iter2->Valid());
    ASSERT_EQ("0055", index_iter2->key().ToString().substr(0, 4));

    index_iter2->SeekToLast();
    ASSERT_TRUE(index_iter2->Valid());
    ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
    index_iter2->Prev();
    ASSERT_TRUE(index_iter2->Valid());
    index_iter2->Prev();
    ASSERT_TRUE(index_iter2->Valid());
    ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));

    index_iter2->Seek(InternalKey("0095", 0, kTypeValue).Encode());
    ASSERT_TRUE(index_iter2->Valid());
    ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
    index_iter2->Prev();
    ASSERT_TRUE(index_iter2->Valid());
    index_iter2->Prev();
    ASSERT_TRUE(index_iter2->Valid());
    ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));

    index_iter2->SeekToLast();
    ASSERT_TRUE(index_iter2->Valid());
    ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));

    index_iter2->Seek(InternalKey("0095", 0, kTypeValue).Encode());
    ASSERT_TRUE(index_iter2->Valid());
    ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));

    index_iter2->Prev();
    ASSERT_TRUE(index_iter2->Valid());
    index_iter2->Prev();
    ASSERT_TRUE(index_iter2->Valid());
    ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));

    index_iter2->Seek(InternalKey("0075", 0, kTypeValue).Encode());
    ASSERT_TRUE(index_iter2->Valid());
    ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));

    index_iter2->Next();
    ASSERT_TRUE(index_iter2->Valid());
    index_iter2->Next();
    ASSERT_TRUE(index_iter2->Valid());
    ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));

    index_iter2->SeekToLast();
    ASSERT_TRUE(index_iter2->Valid());
    ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
  }

2106
  c.ResetTableReader();
2107 2108
}

2109 2110
TEST_P(BlockBasedTableTest, BinaryIndexTest) {
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
M
Maysam Yabandeh 已提交
2111 2112 2113 2114
  table_options.index_type = BlockBasedTableOptions::kBinarySearch;
  IndexTest(table_options);
}

2115 2116
TEST_P(BlockBasedTableTest, HashIndexTest) {
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
M
Maysam Yabandeh 已提交
2117 2118 2119 2120
  table_options.index_type = BlockBasedTableOptions::kHashSearch;
  IndexTest(table_options);
}

2121
TEST_P(BlockBasedTableTest, PartitionIndexTest) {
M
Maysam Yabandeh 已提交
2122
  const int max_index_keys = 5;
M
Maysam Yabandeh 已提交
2123 2124 2125
  const int est_max_index_key_value_size = 32;
  const int est_max_index_size = max_index_keys * est_max_index_key_value_size;
  for (int i = 1; i <= est_max_index_size + 1; i++) {
2126
    BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
M
Maysam Yabandeh 已提交
2127
    table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
M
Maysam Yabandeh 已提交
2128
    table_options.metadata_block_size = i;
M
Maysam Yabandeh 已提交
2129 2130 2131 2132
    IndexTest(table_options);
  }
}

2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153
TEST_P(BlockBasedTableTest, IndexSeekOptimizationIncomplete) {
  std::unique_ptr<InternalKeyComparator> comparator(
      new InternalKeyComparator(BytewiseComparator()));
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  Options options;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  const ImmutableCFOptions ioptions(options);
  const MutableCFOptions moptions(options);

  TableConstructor c(BytewiseComparator());
  AddInternalKey(&c, "pika");

  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
           &kvmap);
  ASSERT_EQ(1, keys.size());

  auto reader = c.GetTableReader();
  ReadOptions ropt;
  ropt.read_tier = ReadTier::kBlockCacheTier;
2154 2155 2156
  std::unique_ptr<InternalIterator> iter(reader->NewIterator(
      ropt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized));
2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171

  auto ikey = [](Slice user_key) {
    return InternalKey(user_key, 0, kTypeValue).Encode().ToString();
  };

  iter->Seek(ikey("pika"));
  ASSERT_FALSE(iter->Valid());
  ASSERT_TRUE(iter->status().IsIncomplete());

  // This used to crash at some point.
  iter->Seek(ikey("pika"));
  ASSERT_FALSE(iter->Valid());
  ASSERT_TRUE(iter->status().IsIncomplete());
}

2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257
TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey1) {
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey;
  IndexTest(table_options);
}

class CustomFlushBlockPolicy : public FlushBlockPolicyFactory,
                               public FlushBlockPolicy {
 public:
  explicit CustomFlushBlockPolicy(std::vector<int> keys_per_block)
      : keys_per_block_(keys_per_block) {}

  const char* Name() const override { return "table_test"; }
  FlushBlockPolicy* NewFlushBlockPolicy(const BlockBasedTableOptions&,
                                        const BlockBuilder&) const override {
    return new CustomFlushBlockPolicy(keys_per_block_);
  }

  bool Update(const Slice&, const Slice&) override {
    if (keys_in_current_block_ >= keys_per_block_.at(current_block_idx_)) {
      ++current_block_idx_;
      keys_in_current_block_ = 1;
      return true;
    }

    ++keys_in_current_block_;
    return false;
  }

  std::vector<int> keys_per_block_;

  int current_block_idx_ = 0;
  int keys_in_current_block_ = 0;
};

TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey2) {
  for (int use_first_key = 0; use_first_key < 2; ++use_first_key) {
    SCOPED_TRACE("use_first_key = " + std::to_string(use_first_key));
    BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
    table_options.index_type =
        use_first_key ? BlockBasedTableOptions::kBinarySearchWithFirstKey
                      : BlockBasedTableOptions::kBinarySearch;
    table_options.block_cache = NewLRUCache(10000);  // fits all blocks
    table_options.index_shortening =
        BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
    table_options.flush_block_policy_factory =
        std::make_shared<CustomFlushBlockPolicy>(std::vector<int>{2, 1, 3, 2});
    Options options;
    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
    options.statistics = CreateDBStatistics();
    Statistics* stats = options.statistics.get();
    std::unique_ptr<InternalKeyComparator> comparator(
        new InternalKeyComparator(BytewiseComparator()));
    const ImmutableCFOptions ioptions(options);
    const MutableCFOptions moptions(options);

    TableConstructor c(BytewiseComparator());

    // Block 0.
    AddInternalKey(&c, "aaaa", "v0");
    AddInternalKey(&c, "aaac", "v1");

    // Block 1.
    AddInternalKey(&c, "aaca", "v2");

    // Block 2.
    AddInternalKey(&c, "caaa", "v3");
    AddInternalKey(&c, "caac", "v4");
    AddInternalKey(&c, "caae", "v5");

    // Block 3.
    AddInternalKey(&c, "ccaa", "v6");
    AddInternalKey(&c, "ccac", "v7");

    // Write the file.
    std::vector<std::string> keys;
    stl_wrappers::KVMap kvmap;
    c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
             &kvmap);
    ASSERT_EQ(8, keys.size());

    auto reader = c.GetTableReader();
    auto props = reader->GetTableProperties();
    ASSERT_EQ(4u, props->num_data_blocks);
    std::unique_ptr<InternalIterator> iter(reader->NewIterator(
        ReadOptions(), /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
2258 2259
        /*skip_filters=*/false, TableReaderCaller::kUncategorized,
        /*compaction_readahead_size=*/0, /*allow_unprepared_value=*/true));
2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275

    // Shouldn't have read data blocks before iterator is seeked.
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    auto ikey = [](Slice user_key) {
      return InternalKey(user_key, 0, kTypeValue).Encode().ToString();
    };

    // Seek to a key between blocks. If index contains first key, we shouldn't
    // read any data blocks until value is requested.
    iter->Seek(ikey("aaba"));
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[2], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 0 : 1,
              stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2276
    ASSERT_TRUE(iter->PrepareValue());
2277 2278 2279 2280 2281 2282 2283 2284 2285 2286
    EXPECT_EQ("v2", iter->value().ToString());
    EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Seek to the middle of a block. The block should be read right away.
    iter->Seek(ikey("caab"));
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[4], iter->key().ToString());
    EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2287
    ASSERT_TRUE(iter->PrepareValue());
2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302
    EXPECT_EQ("v4", iter->value().ToString());
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Seek to just before the same block and don't access value.
    // The iterator should keep pinning the block contents.
    iter->Seek(ikey("baaa"));
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[3], iter->key().ToString());
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Seek to the same block again to check that the block is still pinned.
    iter->Seek(ikey("caae"));
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[5], iter->key().ToString());
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2303
    ASSERT_TRUE(iter->PrepareValue());
2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320
    EXPECT_EQ("v5", iter->value().ToString());
    EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Step forward and fall through to the next block. Don't access value.
    iter->Next();
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[6], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 2 : 3,
              stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Step forward again. Block should be read.
    iter->Next();
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[7], iter->key().ToString());
    EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2321
    ASSERT_TRUE(iter->PrepareValue());
2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342
    EXPECT_EQ("v7", iter->value().ToString());
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Step forward and reach the end.
    iter->Next();
    EXPECT_FALSE(iter->Valid());
    EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
    EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Seek to a single-key block and step forward without accessing value.
    iter->Seek(ikey("aaca"));
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[2], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 0 : 1,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    iter->Next();
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[3], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 1 : 2,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2343
    ASSERT_TRUE(iter->PrepareValue());
2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362
    EXPECT_EQ("v3", iter->value().ToString());
    EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
    EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));

    // Seek between blocks and step back without accessing value.
    iter->Seek(ikey("aaca"));
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[2], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 2 : 3,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
    EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));

    iter->Prev();
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[1], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 2 : 3,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
    // All blocks are in cache now, there'll be no more misses ever.
    EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2363
    ASSERT_TRUE(iter->PrepareValue());
2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390
    EXPECT_EQ("v1", iter->value().ToString());

    // Next into the next block again.
    iter->Next();
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[2], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 2 : 4,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Seek to first and step back without accessing value.
    iter->SeekToFirst();
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[0], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 2 : 5,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    iter->Prev();
    EXPECT_FALSE(iter->Valid());
    EXPECT_EQ(use_first_key ? 2 : 5,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    // Do some SeekForPrev() and SeekToLast() just to cover all methods.
    iter->SeekForPrev(ikey("caad"));
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[4], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 3 : 6,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2391
    ASSERT_TRUE(iter->PrepareValue());
2392 2393 2394 2395 2396 2397 2398 2399 2400
    EXPECT_EQ("v4", iter->value().ToString());
    EXPECT_EQ(use_first_key ? 3 : 6,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    iter->SeekToLast();
    ASSERT_TRUE(iter->Valid());
    EXPECT_EQ(keys[7], iter->key().ToString());
    EXPECT_EQ(use_first_key ? 4 : 7,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2401
    ASSERT_TRUE(iter->PrepareValue());
2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441
    EXPECT_EQ("v7", iter->value().ToString());
    EXPECT_EQ(use_first_key ? 4 : 7,
              stats->getTickerCount(BLOCK_CACHE_DATA_HIT));

    EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));

    c.ResetTableReader();
  }
}

TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKeyGlobalSeqno) {
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey;
  table_options.block_cache = NewLRUCache(10000);
  Options options;
  options.statistics = CreateDBStatistics();
  Statistics* stats = options.statistics.get();
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  std::unique_ptr<InternalKeyComparator> comparator(
      new InternalKeyComparator(BytewiseComparator()));
  const ImmutableCFOptions ioptions(options);
  const MutableCFOptions moptions(options);

  TableConstructor c(BytewiseComparator(), /* convert_to_internal_key */ false,
                     /* level */ -1, /* largest_seqno */ 42);

  c.Add(InternalKey("b", 0, kTypeValue).Encode().ToString(), "x");
  c.Add(InternalKey("c", 0, kTypeValue).Encode().ToString(), "y");

  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
           &kvmap);
  ASSERT_EQ(2, keys.size());

  auto reader = c.GetTableReader();
  auto props = reader->GetTableProperties();
  ASSERT_EQ(1u, props->num_data_blocks);
  std::unique_ptr<InternalIterator> iter(reader->NewIterator(
      ReadOptions(), /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
2442 2443
      /*skip_filters=*/false, TableReaderCaller::kUncategorized,
      /*compaction_readahead_size=*/0, /*allow_unprepared_value=*/true));
2444 2445 2446 2447 2448 2449 2450 2451 2452

  iter->Seek(InternalKey("a", 0, kTypeValue).Encode().ToString());
  ASSERT_TRUE(iter->Valid());
  EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(),
            iter->key().ToString());
  EXPECT_NE(keys[0], iter->key().ToString());
  // Key should have been served from index, without reading data blocks.
  EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));

2453
  ASSERT_TRUE(iter->PrepareValue());
2454 2455 2456 2457 2458 2459 2460 2461 2462
  EXPECT_EQ("x", iter->value().ToString());
  EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
  EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
  EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(),
            iter->key().ToString());

  c.ResetTableReader();
}

K
Kai Liu 已提交
2463 2464 2465
// It's very hard to figure out the index block size of a block accurately.
// To make sure we get the index size, we just make sure as key number
// grows, the filter block size also grows.
2466
TEST_P(BlockBasedTableTest, IndexSizeStat) {
K
Kai Liu 已提交
2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481
  uint64_t last_index_size = 0;

  // we need to use random keys since the pure human readable texts
  // may be well compressed, resulting insignifcant change of index
  // block size.
  Random rnd(test::RandomSeed());
  std::vector<std::string> keys;

  for (int i = 0; i < 100; ++i) {
    keys.push_back(RandomString(&rnd, 10000));
  }

  // Each time we load one more key to the table. the table index block
  // size is expected to be larger than last time's.
  for (size_t i = 1; i < keys.size(); ++i) {
2482 2483
    TableConstructor c(BytewiseComparator(),
                       true /* convert_to_internal_key_ */);
K
Kai Liu 已提交
2484 2485 2486 2487 2488
    for (size_t j = 0; j < i; ++j) {
      c.Add(keys[j], "val");
    }

    std::vector<std::string> ks;
2489
    stl_wrappers::KVMap kvmap;
2490
    Options options;
K
Kai Liu 已提交
2491
    options.compression = kNoCompression;
2492
    BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2493 2494
    table_options.block_restart_interval = 1;
    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
K
Kai Liu 已提交
2495

L
Lei Jin 已提交
2496
    const ImmutableCFOptions ioptions(options);
2497 2498
    const MutableCFOptions moptions(options);
    c.Finish(options, ioptions, moptions, table_options,
2499
             GetPlainInternalComparator(options.comparator), &ks, &kvmap);
2500
    auto index_size = c.GetTableReader()->GetTableProperties()->index_size;
K
Kai Liu 已提交
2501 2502
    ASSERT_GT(index_size, last_index_size);
    last_index_size = index_size;
2503
    c.ResetTableReader();
K
Kai Liu 已提交
2504 2505 2506
  }
}

2507
TEST_P(BlockBasedTableTest, NumBlockStat) {
K
Kai Liu 已提交
2508
  Random rnd(test::RandomSeed());
2509
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
K
Kai Liu 已提交
2510 2511
  Options options;
  options.compression = kNoCompression;
2512
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2513 2514 2515
  table_options.block_restart_interval = 1;
  table_options.block_size = 1000;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
K
Kai Liu 已提交
2516 2517 2518 2519 2520 2521 2522 2523

  for (int i = 0; i < 10; ++i) {
    // the key/val are slightly smaller than block size, so that each block
    // holds roughly one key/value pair.
    c.Add(RandomString(&rnd, 900), "val");
  }

  std::vector<std::string> ks;
2524
  stl_wrappers::KVMap kvmap;
L
Lei Jin 已提交
2525
  const ImmutableCFOptions ioptions(options);
2526 2527
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options,
2528
           GetPlainInternalComparator(options.comparator), &ks, &kvmap);
2529
  ASSERT_EQ(kvmap.size(),
2530
            c.GetTableReader()->GetTableProperties()->num_data_blocks);
2531
  c.ResetTableReader();
K
Kai Liu 已提交
2532 2533
}

2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556
TEST_P(BlockBasedTableTest, TracingGetTest) {
  TableConstructor c(BytewiseComparator());
  Options options;
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  options.create_if_missing = true;
  table_options.block_cache = NewLRUCache(1024 * 1024, 0);
  table_options.cache_index_and_filter_blocks = true;
  table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
  SetupTracingTest(&c);
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  ImmutableCFOptions ioptions(options);
  MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options,
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);
  std::string user_key = "k01";
  InternalKey internal_key(user_key, 0, kTypeValue);
  std::string encoded_key = internal_key.Encode().ToString();
  for (uint32_t i = 1; i <= 2; i++) {
    PinnableSlice value;
    GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
                           GetContext::kNotFound, user_key, &value, nullptr,
2557 2558
                           nullptr, true, nullptr, nullptr, nullptr, nullptr,
                           nullptr, nullptr, /*tracing_get_id=*/i);
2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714
    get_perf_context()->Reset();
    ASSERT_OK(c.GetTableReader()->Get(ReadOptions(), encoded_key, &get_context,
                                      moptions.prefix_extractor.get()));
    ASSERT_EQ(get_context.State(), GetContext::kFound);
    ASSERT_EQ(value.ToString(), kDummyValue);
  }

  // Verify traces.
  std::vector<BlockCacheTraceRecord> expected_records;
  // The first two records should be prefetching index and filter blocks.
  BlockCacheTraceRecord record;
  record.block_type = TraceType::kBlockTraceIndexBlock;
  record.caller = TableReaderCaller::kPrefetch;
  record.is_cache_hit = Boolean::kFalse;
  record.no_insert = Boolean::kFalse;
  expected_records.push_back(record);
  record.block_type = TraceType::kBlockTraceFilterBlock;
  expected_records.push_back(record);
  // Then we should have three records for one index, one filter, and one data
  // block access.
  record.get_id = 1;
  record.block_type = TraceType::kBlockTraceIndexBlock;
  record.caller = TableReaderCaller::kUserGet;
  record.get_from_user_specified_snapshot = Boolean::kFalse;
  record.referenced_key = encoded_key;
  record.referenced_key_exist_in_block = Boolean::kTrue;
  record.is_cache_hit = Boolean::kTrue;
  expected_records.push_back(record);
  record.block_type = TraceType::kBlockTraceFilterBlock;
  expected_records.push_back(record);
  record.is_cache_hit = Boolean::kFalse;
  record.block_type = TraceType::kBlockTraceDataBlock;
  expected_records.push_back(record);
  // The second get should all observe cache hits.
  record.is_cache_hit = Boolean::kTrue;
  record.get_id = 2;
  record.block_type = TraceType::kBlockTraceIndexBlock;
  record.caller = TableReaderCaller::kUserGet;
  record.get_from_user_specified_snapshot = Boolean::kFalse;
  record.referenced_key = encoded_key;
  expected_records.push_back(record);
  record.block_type = TraceType::kBlockTraceFilterBlock;
  expected_records.push_back(record);
  record.block_type = TraceType::kBlockTraceDataBlock;
  expected_records.push_back(record);
  VerifyBlockAccessTrace(&c, expected_records);
  c.ResetTableReader();
}

TEST_P(BlockBasedTableTest, TracingApproximateOffsetOfTest) {
  TableConstructor c(BytewiseComparator());
  Options options;
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  options.create_if_missing = true;
  table_options.block_cache = NewLRUCache(1024 * 1024, 0);
  table_options.cache_index_and_filter_blocks = true;
  table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
  SetupTracingTest(&c);
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  ImmutableCFOptions ioptions(options);
  MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options,
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);
  for (uint32_t i = 1; i <= 2; i++) {
    std::string user_key = "k01";
    InternalKey internal_key(user_key, 0, kTypeValue);
    std::string encoded_key = internal_key.Encode().ToString();
    c.GetTableReader()->ApproximateOffsetOf(
        encoded_key, TableReaderCaller::kUserApproximateSize);
  }
  // Verify traces.
  std::vector<BlockCacheTraceRecord> expected_records;
  // The first two records should be prefetching index and filter blocks.
  BlockCacheTraceRecord record;
  record.block_type = TraceType::kBlockTraceIndexBlock;
  record.caller = TableReaderCaller::kPrefetch;
  record.is_cache_hit = Boolean::kFalse;
  record.no_insert = Boolean::kFalse;
  expected_records.push_back(record);
  record.block_type = TraceType::kBlockTraceFilterBlock;
  expected_records.push_back(record);
  // Then we should have two records for only index blocks.
  record.block_type = TraceType::kBlockTraceIndexBlock;
  record.caller = TableReaderCaller::kUserApproximateSize;
  record.is_cache_hit = Boolean::kTrue;
  expected_records.push_back(record);
  expected_records.push_back(record);
  VerifyBlockAccessTrace(&c, expected_records);
  c.ResetTableReader();
}

TEST_P(BlockBasedTableTest, TracingIterator) {
  TableConstructor c(BytewiseComparator());
  Options options;
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  options.create_if_missing = true;
  table_options.block_cache = NewLRUCache(1024 * 1024, 0);
  table_options.cache_index_and_filter_blocks = true;
  table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
  SetupTracingTest(&c);
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  ImmutableCFOptions ioptions(options);
  MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options,
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);

  for (uint32_t i = 1; i <= 2; i++) {
    std::unique_ptr<InternalIterator> iter(c.GetTableReader()->NewIterator(
        ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr,
        /*skip_filters=*/false, TableReaderCaller::kUserIterator));
    iter->SeekToFirst();
    while (iter->Valid()) {
      iter->key();
      iter->value();
      iter->Next();
    }
    ASSERT_OK(iter->status());
    iter.reset();
  }

  // Verify traces.
  std::vector<BlockCacheTraceRecord> expected_records;
  // The first two records should be prefetching index and filter blocks.
  BlockCacheTraceRecord record;
  record.block_type = TraceType::kBlockTraceIndexBlock;
  record.caller = TableReaderCaller::kPrefetch;
  record.is_cache_hit = Boolean::kFalse;
  record.no_insert = Boolean::kFalse;
  expected_records.push_back(record);
  record.block_type = TraceType::kBlockTraceFilterBlock;
  expected_records.push_back(record);
  // Then we should have three records for index and two data block access.
  record.block_type = TraceType::kBlockTraceIndexBlock;
  record.caller = TableReaderCaller::kUserIterator;
  record.is_cache_hit = Boolean::kTrue;
  expected_records.push_back(record);
  record.block_type = TraceType::kBlockTraceDataBlock;
  record.is_cache_hit = Boolean::kFalse;
  expected_records.push_back(record);
  expected_records.push_back(record);
  // When we iterate this file for the second time, we should observe all cache
  // hits.
  record.block_type = TraceType::kBlockTraceIndexBlock;
  record.is_cache_hit = Boolean::kTrue;
  expected_records.push_back(record);
  record.block_type = TraceType::kBlockTraceDataBlock;
  expected_records.push_back(record);
  expected_records.push_back(record);
  VerifyBlockAccessTrace(&c, expected_records);
  c.ResetTableReader();
}

2715 2716
// A simple tool that takes the snapshot of block cache statistics.
class BlockCachePropertiesSnapshot {
K
Kai Liu 已提交
2717
 public:
2718
  explicit BlockCachePropertiesSnapshot(Statistics* statistics) {
I
Igor Canadi 已提交
2719 2720 2721 2722 2723 2724
    block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_MISS);
    block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_HIT);
    index_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS);
    index_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT);
    data_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_DATA_MISS);
    data_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_DATA_HIT);
2725 2726 2727
    filter_block_cache_miss =
        statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS);
    filter_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT);
2728 2729 2730
    block_cache_bytes_read = statistics->getTickerCount(BLOCK_CACHE_BYTES_READ);
    block_cache_bytes_write =
        statistics->getTickerCount(BLOCK_CACHE_BYTES_WRITE);
2731 2732
  }

I
Igor Canadi 已提交
2733 2734 2735 2736
  void AssertIndexBlockStat(int64_t expected_index_block_cache_miss,
                            int64_t expected_index_block_cache_hit) {
    ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss);
    ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit);
2737 2738
  }

I
Igor Canadi 已提交
2739 2740 2741 2742
  void AssertFilterBlockStat(int64_t expected_filter_block_cache_miss,
                             int64_t expected_filter_block_cache_hit) {
    ASSERT_EQ(expected_filter_block_cache_miss, filter_block_cache_miss);
    ASSERT_EQ(expected_filter_block_cache_hit, filter_block_cache_hit);
K
Kai Liu 已提交
2743 2744
  }

K
kailiu 已提交
2745
  // Check if the fetched props matches the expected ones.
2746
  // TODO(kailiu) Use this only when you disabled filter policy!
I
Igor Canadi 已提交
2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758
  void AssertEqual(int64_t expected_index_block_cache_miss,
                   int64_t expected_index_block_cache_hit,
                   int64_t expected_data_block_cache_miss,
                   int64_t expected_data_block_cache_hit) const {
    ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss);
    ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit);
    ASSERT_EQ(expected_data_block_cache_miss, data_block_cache_miss);
    ASSERT_EQ(expected_data_block_cache_hit, data_block_cache_hit);
    ASSERT_EQ(expected_index_block_cache_miss + expected_data_block_cache_miss,
              block_cache_miss);
    ASSERT_EQ(expected_index_block_cache_hit + expected_data_block_cache_hit,
              block_cache_hit);
K
Kai Liu 已提交
2759 2760
  }

2761 2762 2763 2764
  int64_t GetCacheBytesRead() { return block_cache_bytes_read; }

  int64_t GetCacheBytesWrite() { return block_cache_bytes_write; }

K
Kai Liu 已提交
2765
 private:
2766 2767 2768 2769 2770 2771
  int64_t block_cache_miss = 0;
  int64_t block_cache_hit = 0;
  int64_t index_block_cache_miss = 0;
  int64_t index_block_cache_hit = 0;
  int64_t data_block_cache_miss = 0;
  int64_t data_block_cache_hit = 0;
2772 2773
  int64_t filter_block_cache_miss = 0;
  int64_t filter_block_cache_hit = 0;
2774 2775
  int64_t block_cache_bytes_read = 0;
  int64_t block_cache_bytes_write = 0;
K
Kai Liu 已提交
2776 2777
};

2778 2779
// Make sure, by default, index/filter blocks were pre-loaded (meaning we won't
// use block cache to store them).
2780
TEST_P(BlockBasedTableTest, BlockCacheDisabledTest) {
2781 2782 2783
  Options options;
  options.create_if_missing = true;
  options.statistics = CreateDBStatistics();
2784
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2785
  table_options.block_cache = NewLRUCache(1024, 4);
2786
  table_options.filter_policy.reset(NewBloomFilterPolicy(10));
2787 2788
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
  std::vector<std::string> keys;
2789
  stl_wrappers::KVMap kvmap;
2790

2791
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2792
  c.Add("key", "value");
L
Lei Jin 已提交
2793
  const ImmutableCFOptions ioptions(options);
2794 2795
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options,
2796
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2797 2798

  // preloading filter/index blocks is enabled.
2799
  auto reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2800
  ASSERT_FALSE(reader->TEST_FilterBlockInCache());
2801
  ASSERT_FALSE(reader->TEST_IndexBlockInCache());
2802 2803 2804 2805 2806 2807 2808 2809 2810

  {
    // nothing happens in the beginning
    BlockCachePropertiesSnapshot props(options.statistics.get());
    props.AssertIndexBlockStat(0, 0);
    props.AssertFilterBlockStat(0, 0);
  }

  {
2811
    GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2812
                           GetContext::kNotFound, Slice(), nullptr, nullptr,
2813
                           nullptr, true, nullptr, nullptr);
2814
    // a hack that just to trigger BlockBasedTable::GetFilter.
2815 2816
    reader->Get(ReadOptions(), "non-exist-key", &get_context,
                moptions.prefix_extractor.get());
2817 2818 2819 2820 2821 2822 2823 2824
    BlockCachePropertiesSnapshot props(options.statistics.get());
    props.AssertIndexBlockStat(0, 0);
    props.AssertFilterBlockStat(0, 0);
  }
}

// Due to the difficulities of the intersaction between statistics, this test
// only tests the case when "index block is put to block cache"
2825
TEST_P(BlockBasedTableTest, FilterBlockInBlockCache) {
K
Kai Liu 已提交
2826
  // -- Table construction
2827
  Options options;
K
Kai Liu 已提交
2828
  options.create_if_missing = true;
2829
  options.statistics = CreateDBStatistics();
2830 2831

  // Enable the cache for index/filter blocks
2832
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2833 2834 2835 2836 2837
  LRUCacheOptions co;
  co.capacity = 2048;
  co.num_shard_bits = 2;
  co.metadata_charge_policy = kDontChargeCacheMetadata;
  table_options.block_cache = NewLRUCache(co);
2838 2839
  table_options.cache_index_and_filter_blocks = true;
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
K
Kai Liu 已提交
2840
  std::vector<std::string> keys;
2841
  stl_wrappers::KVMap kvmap;
K
Kai Liu 已提交
2842

2843
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
K
Kai Liu 已提交
2844
  c.Add("key", "value");
L
Lei Jin 已提交
2845
  const ImmutableCFOptions ioptions(options);
2846 2847
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options,
2848
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2849
  // preloading filter/index blocks is prohibited.
2850
  auto* reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2851
  ASSERT_FALSE(reader->TEST_FilterBlockInCache());
2852
  ASSERT_TRUE(reader->TEST_IndexBlockInCache());
K
Kai Liu 已提交
2853 2854 2855

  // -- PART 1: Open with regular block cache.
  // Since block_cache is disabled, no cache activities will be involved.
2856
  std::unique_ptr<InternalIterator> iter;
K
Kai Liu 已提交
2857

2858
  int64_t last_cache_bytes_read = 0;
K
Kai Liu 已提交
2859 2860
  // At first, no block will be accessed.
  {
2861
    BlockCachePropertiesSnapshot props(options.statistics.get());
K
Kai Liu 已提交
2862
    // index will be added to block cache.
2863 2864
    props.AssertEqual(1,  // index block miss
                      0, 0, 0);
2865 2866
    ASSERT_EQ(props.GetCacheBytesRead(), 0);
    ASSERT_EQ(props.GetCacheBytesWrite(),
2867
              static_cast<int64_t>(table_options.block_cache->GetUsage()));
2868
    last_cache_bytes_read = props.GetCacheBytesRead();
K
Kai Liu 已提交
2869 2870 2871 2872
  }

  // Only index block will be accessed
  {
2873
    iter.reset(c.NewIterator(moptions.prefix_extractor.get()));
2874
    BlockCachePropertiesSnapshot props(options.statistics.get());
K
Kai Liu 已提交
2875 2876 2877
    // NOTE: to help better highlight the "detla" of each ticker, I use
    // <last_value> + <added_value> to indicate the increment of changed
    // value; other numbers remain the same.
2878 2879
    props.AssertEqual(1, 0 + 1,  // index block hit
                      0, 0);
2880 2881 2882
    // Cache hit, bytes read from cache should increase
    ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read);
    ASSERT_EQ(props.GetCacheBytesWrite(),
2883
              static_cast<int64_t>(table_options.block_cache->GetUsage()));
2884
    last_cache_bytes_read = props.GetCacheBytesRead();
K
Kai Liu 已提交
2885 2886 2887 2888 2889
  }

  // Only data block will be accessed
  {
    iter->SeekToFirst();
2890
    BlockCachePropertiesSnapshot props(options.statistics.get());
2891 2892
    props.AssertEqual(1, 1, 0 + 1,  // data block miss
                      0);
2893 2894 2895
    // Cache miss, Bytes read from cache should not change
    ASSERT_EQ(props.GetCacheBytesRead(), last_cache_bytes_read);
    ASSERT_EQ(props.GetCacheBytesWrite(),
2896
              static_cast<int64_t>(table_options.block_cache->GetUsage()));
2897
    last_cache_bytes_read = props.GetCacheBytesRead();
K
Kai Liu 已提交
2898 2899 2900 2901
  }

  // Data block will be in cache
  {
2902
    iter.reset(c.NewIterator(moptions.prefix_extractor.get()));
K
Kai Liu 已提交
2903
    iter->SeekToFirst();
2904
    BlockCachePropertiesSnapshot props(options.statistics.get());
2905 2906
    props.AssertEqual(1, 1 + 1, /* index block hit */
                      1, 0 + 1 /* data block hit */);
2907 2908 2909
    // Cache hit, bytes read from cache should increase
    ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read);
    ASSERT_EQ(props.GetCacheBytesWrite(),
2910
              static_cast<int64_t>(table_options.block_cache->GetUsage()));
K
Kai Liu 已提交
2911 2912 2913
  }
  // release the iterator so that the block cache can reset correctly.
  iter.reset();
2914

2915 2916
  c.ResetTableReader();

2917
  // -- PART 2: Open with very small block cache
K
Kai Liu 已提交
2918 2919
  // In this test, no block will ever get hit since the block cache is
  // too small to fit even one entry.
2920
  table_options.block_cache = NewLRUCache(1, 4);
2921
  options.statistics = CreateDBStatistics();
2922
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
L
Lei Jin 已提交
2923
  const ImmutableCFOptions ioptions2(options);
2924 2925
  const MutableCFOptions moptions2(options);
  c.Reopen(ioptions2, moptions2);
K
Kai Liu 已提交
2926
  {
2927
    BlockCachePropertiesSnapshot props(options.statistics.get());
2928 2929
    props.AssertEqual(1,  // index block miss
                      0, 0, 0);
2930 2931
    // Cache miss, Bytes read from cache should not change
    ASSERT_EQ(props.GetCacheBytesRead(), 0);
K
Kai Liu 已提交
2932 2933 2934 2935 2936 2937
  }

  {
    // Both index and data block get accessed.
    // It first cache index block then data block. But since the cache size
    // is only 1, index block will be purged after data block is inserted.
2938
    iter.reset(c.NewIterator(moptions2.prefix_extractor.get()));
2939
    BlockCachePropertiesSnapshot props(options.statistics.get());
2940 2941 2942
    props.AssertEqual(1 + 1,  // index block miss
                      0, 0,   // data block miss
                      0);
2943 2944
    // Cache hit, bytes read from cache should increase
    ASSERT_EQ(props.GetCacheBytesRead(), 0);
K
Kai Liu 已提交
2945 2946 2947 2948 2949 2950
  }

  {
    // SeekToFirst() accesses data block. With similar reason, we expect data
    // block's cache miss.
    iter->SeekToFirst();
2951
    BlockCachePropertiesSnapshot props(options.statistics.get());
2952 2953
    props.AssertEqual(2, 0, 0 + 1,  // data block miss
                      0);
2954 2955
    // Cache miss, Bytes read from cache should not change
    ASSERT_EQ(props.GetCacheBytesRead(), 0);
K
Kai Liu 已提交
2956
  }
2957
  iter.reset();
2958
  c.ResetTableReader();
2959 2960

  // -- PART 3: Open table with bloom filter enabled but not in SST file
2961
  table_options.block_cache = NewLRUCache(4096, 4);
2962 2963 2964 2965
  table_options.cache_index_and_filter_blocks = false;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));

  TableConstructor c3(BytewiseComparator());
L
Lei Jin 已提交
2966 2967 2968
  std::string user_key = "k01";
  InternalKey internal_key(user_key, 0, kTypeValue);
  c3.Add(internal_key.Encode().ToString(), "hello");
2969
  ImmutableCFOptions ioptions3(options);
2970
  MutableCFOptions moptions3(options);
2971
  // Generate table without filter policy
2972
  c3.Finish(options, ioptions3, moptions3, table_options,
2973 2974 2975
            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
  c3.ResetTableReader();

2976 2977 2978
  // Open table with filter policy
  table_options.filter_policy.reset(NewBloomFilterPolicy(1));
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
2979
  options.statistics = CreateDBStatistics();
2980
  ImmutableCFOptions ioptions4(options);
2981 2982
  MutableCFOptions moptions4(options);
  ASSERT_OK(c3.Reopen(ioptions4, moptions4));
2983
  reader = dynamic_cast<BlockBasedTable*>(c3.GetTableReader());
2984
  ASSERT_FALSE(reader->TEST_FilterBlockInCache());
M
Maysam Yabandeh 已提交
2985
  PinnableSlice value;
2986
  GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2987
                         GetContext::kNotFound, user_key, &value, nullptr,
2988
                         nullptr, true, nullptr, nullptr);
M
Maysam Yabandeh 已提交
2989
  ASSERT_OK(reader->Get(ReadOptions(), internal_key.Encode(), &get_context,
2990
                        moptions4.prefix_extractor.get()));
M
Maysam Yabandeh 已提交
2991
  ASSERT_STREQ(value.data(), "hello");
2992 2993
  BlockCachePropertiesSnapshot props(options.statistics.get());
  props.AssertFilterBlockStat(0, 0);
2994
  c3.ResetTableReader();
K
Kai Liu 已提交
2995 2996
}

2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020
void ValidateBlockSizeDeviation(int value, int expected) {
  BlockBasedTableOptions table_options;
  table_options.block_size_deviation = value;
  BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options);

  const BlockBasedTableOptions* normalized_table_options =
      (const BlockBasedTableOptions*)factory->GetOptions();
  ASSERT_EQ(normalized_table_options->block_size_deviation, expected);

  delete factory;
}

void ValidateBlockRestartInterval(int value, int expected) {
  BlockBasedTableOptions table_options;
  table_options.block_restart_interval = value;
  BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options);

  const BlockBasedTableOptions* normalized_table_options =
      (const BlockBasedTableOptions*)factory->GetOptions();
  ASSERT_EQ(normalized_table_options->block_restart_interval, expected);

  delete factory;
}

3021
TEST_P(BlockBasedTableTest, InvalidOptions) {
3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040
  // invalid values for block_size_deviation (<0 or >100) are silently set to 0
  ValidateBlockSizeDeviation(-10, 0);
  ValidateBlockSizeDeviation(-1, 0);
  ValidateBlockSizeDeviation(0, 0);
  ValidateBlockSizeDeviation(1, 1);
  ValidateBlockSizeDeviation(99, 99);
  ValidateBlockSizeDeviation(100, 100);
  ValidateBlockSizeDeviation(101, 0);
  ValidateBlockSizeDeviation(1000, 0);

  // invalid values for block_restart_interval (<1) are silently set to 1
  ValidateBlockRestartInterval(-10, 1);
  ValidateBlockRestartInterval(-1, 1);
  ValidateBlockRestartInterval(0, 1);
  ValidateBlockRestartInterval(1, 1);
  ValidateBlockRestartInterval(2, 2);
  ValidateBlockRestartInterval(1000, 1000);
}

3041
TEST_P(BlockBasedTableTest, BlockReadCountTest) {
I
Igor Canadi 已提交
3042 3043 3044 3045 3046 3047 3048 3049
  // bloom_filter_type = 0 -- block-based filter
  // bloom_filter_type = 0 -- full filter
  for (int bloom_filter_type = 0; bloom_filter_type < 2; ++bloom_filter_type) {
    for (int index_and_filter_in_cache = 0; index_and_filter_in_cache < 2;
         ++index_and_filter_in_cache) {
      Options options;
      options.create_if_missing = true;

3050
      BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
I
Igor Canadi 已提交
3051 3052 3053 3054 3055 3056
      table_options.block_cache = NewLRUCache(1, 0);
      table_options.cache_index_and_filter_blocks = index_and_filter_in_cache;
      table_options.filter_policy.reset(
          NewBloomFilterPolicy(10, bloom_filter_type == 0));
      options.table_factory.reset(new BlockBasedTableFactory(table_options));
      std::vector<std::string> keys;
I
Igor Canadi 已提交
3057
      stl_wrappers::KVMap kvmap;
I
Igor Canadi 已提交
3058 3059 3060 3061 3062 3063 3064

      TableConstructor c(BytewiseComparator());
      std::string user_key = "k04";
      InternalKey internal_key(user_key, 0, kTypeValue);
      std::string encoded_key = internal_key.Encode().ToString();
      c.Add(encoded_key, "hello");
      ImmutableCFOptions ioptions(options);
3065
      MutableCFOptions moptions(options);
I
Igor Canadi 已提交
3066
      // Generate table with filter policy
3067
      c.Finish(options, ioptions, moptions, table_options,
I
Igor Canadi 已提交
3068 3069
               GetPlainInternalComparator(options.comparator), &keys, &kvmap);
      auto reader = c.GetTableReader();
M
Maysam Yabandeh 已提交
3070
      PinnableSlice value;
3071 3072 3073
      {
        GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
                               GetContext::kNotFound, user_key, &value, nullptr,
3074
                               nullptr, true, nullptr, nullptr);
3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088
        get_perf_context()->Reset();
        ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
                              moptions.prefix_extractor.get()));
        if (index_and_filter_in_cache) {
          // data, index and filter block
          ASSERT_EQ(get_perf_context()->block_read_count, 3);
          ASSERT_EQ(get_perf_context()->index_block_read_count, 1);
          ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
        } else {
          // just the data block
          ASSERT_EQ(get_perf_context()->block_read_count, 1);
        }
        ASSERT_EQ(get_context.State(), GetContext::kFound);
        ASSERT_STREQ(value.data(), "hello");
I
Igor Canadi 已提交
3089 3090 3091 3092 3093 3094 3095
      }

      // Get non-existing key
      user_key = "does-not-exist";
      internal_key = InternalKey(user_key, 0, kTypeValue);
      encoded_key = internal_key.Encode().ToString();

M
Maysam Yabandeh 已提交
3096
      value.Reset();
3097 3098
      {
        GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
I
Igor Canadi 已提交
3099
                               GetContext::kNotFound, user_key, &value, nullptr,
3100
                               nullptr, true, nullptr, nullptr);
3101 3102 3103 3104 3105
        get_perf_context()->Reset();
        ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
                              moptions.prefix_extractor.get()));
        ASSERT_EQ(get_context.State(), GetContext::kNotFound);
      }
I
Igor Canadi 已提交
3106 3107 3108 3109

      if (index_and_filter_in_cache) {
        if (bloom_filter_type == 0) {
          // with block-based, we read index and then the filter
3110
          ASSERT_EQ(get_perf_context()->block_read_count, 2);
3111 3112
          ASSERT_EQ(get_perf_context()->index_block_read_count, 1);
          ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
I
Igor Canadi 已提交
3113 3114
        } else {
          // with full-filter, we read filter first and then we stop
3115
          ASSERT_EQ(get_perf_context()->block_read_count, 1);
3116
          ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
I
Igor Canadi 已提交
3117 3118 3119 3120
        }
      } else {
        // filter is already in memory and it figures out that the key doesn't
        // exist
3121
        ASSERT_EQ(get_perf_context()->block_read_count, 0);
I
Igor Canadi 已提交
3122 3123 3124 3125 3126
      }
    }
  }
}

3127
TEST_P(BlockBasedTableTest, BlockCacheLeak) {
K
Kai Liu 已提交
3128 3129 3130 3131 3132
  // Check that when we reopen a table we don't lose access to blocks already
  // in the cache. This test checks whether the Table actually makes use of the
  // unique ID from the file.

  Options opt;
3133
  std::unique_ptr<InternalKeyComparator> ikc;
3134
  ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
K
Kai Liu 已提交
3135
  opt.compression = kNoCompression;
3136
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3137 3138
  table_options.block_size = 1024;
  // big enough so we don't ever lose cached values.
3139
  table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
3140
  opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
K
Kai Liu 已提交
3141

3142
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
K
Kai Liu 已提交
3143 3144 3145 3146 3147 3148 3149 3150
  c.Add("k01", "hello");
  c.Add("k02", "hello2");
  c.Add("k03", std::string(10000, 'x'));
  c.Add("k04", std::string(200000, 'x'));
  c.Add("k05", std::string(300000, 'x'));
  c.Add("k06", "hello3");
  c.Add("k07", std::string(100000, 'x'));
  std::vector<std::string> keys;
3151
  stl_wrappers::KVMap kvmap;
L
Lei Jin 已提交
3152
  const ImmutableCFOptions ioptions(opt);
3153 3154
  const MutableCFOptions moptions(opt);
  c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
K
Kai Liu 已提交
3155

3156
  std::unique_ptr<InternalIterator> iter(
3157
      c.NewIterator(moptions.prefix_extractor.get()));
K
Kai Liu 已提交
3158 3159 3160 3161 3162 3163 3164
  iter->SeekToFirst();
  while (iter->Valid()) {
    iter->key();
    iter->value();
    iter->Next();
  }
  ASSERT_OK(iter->status());
3165
  iter.reset();
K
Kai Liu 已提交
3166

L
Lei Jin 已提交
3167
  const ImmutableCFOptions ioptions1(opt);
3168 3169
  const MutableCFOptions moptions1(opt);
  ASSERT_OK(c.Reopen(ioptions1, moptions1));
3170
  auto table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
K
Kai Liu 已提交
3171
  for (const std::string& key : keys) {
M
Maysam Yabandeh 已提交
3172 3173
    InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
    ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
K
Kai Liu 已提交
3174
  }
3175
  c.ResetTableReader();
I
Igor Canadi 已提交
3176 3177

  // rerun with different block cache
3178
  table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
3179
  opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
L
Lei Jin 已提交
3180
  const ImmutableCFOptions ioptions2(opt);
3181 3182
  const MutableCFOptions moptions2(opt);
  ASSERT_OK(c.Reopen(ioptions2, moptions2));
3183
  table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
I
Igor Canadi 已提交
3184
  for (const std::string& key : keys) {
M
Maysam Yabandeh 已提交
3185 3186
    InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
    ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
I
Igor Canadi 已提交
3187
  }
3188
  c.ResetTableReader();
K
Kai Liu 已提交
3189 3190
}

3191
namespace {
Y
Yi Wu 已提交
3192
class CustomMemoryAllocator : public MemoryAllocator {
3193
 public:
3194
  const char* Name() const override { return "CustomMemoryAllocator"; }
3195 3196 3197 3198

  void* Allocate(size_t size) override {
    ++numAllocations;
    auto ptr = new char[size + 16];
Y
Yi Wu 已提交
3199
    memcpy(ptr, "memory_allocator_", 16);  // mangle first 16 bytes
3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212
    return reinterpret_cast<void*>(ptr + 16);
  }
  void Deallocate(void* p) override {
    ++numDeallocations;
    char* ptr = reinterpret_cast<char*>(p) - 16;
    delete[] ptr;
  }

  std::atomic<int> numAllocations;
  std::atomic<int> numDeallocations;
};
}  // namespace

Y
Yi Wu 已提交
3213 3214
TEST_P(BlockBasedTableTest, MemoryAllocator) {
  auto custom_memory_allocator = std::make_shared<CustomMemoryAllocator>();
3215 3216
  {
    Options opt;
3217
    std::unique_ptr<InternalKeyComparator> ikc;
3218 3219 3220 3221 3222
    ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
    opt.compression = kNoCompression;
    BlockBasedTableOptions table_options;
    table_options.block_size = 1024;
    LRUCacheOptions lruOptions;
3223
    lruOptions.memory_allocator = custom_memory_allocator;
3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243
    lruOptions.capacity = 16 * 1024 * 1024;
    lruOptions.num_shard_bits = 4;
    table_options.block_cache = NewLRUCache(std::move(lruOptions));
    opt.table_factory.reset(NewBlockBasedTableFactory(table_options));

    TableConstructor c(BytewiseComparator(),
                       true /* convert_to_internal_key_ */);
    c.Add("k01", "hello");
    c.Add("k02", "hello2");
    c.Add("k03", std::string(10000, 'x'));
    c.Add("k04", std::string(200000, 'x'));
    c.Add("k05", std::string(300000, 'x'));
    c.Add("k06", "hello3");
    c.Add("k07", std::string(100000, 'x'));
    std::vector<std::string> keys;
    stl_wrappers::KVMap kvmap;
    const ImmutableCFOptions ioptions(opt);
    const MutableCFOptions moptions(opt);
    c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);

3244
    std::unique_ptr<InternalIterator> iter(
3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256
        c.NewIterator(moptions.prefix_extractor.get()));
    iter->SeekToFirst();
    while (iter->Valid()) {
      iter->key();
      iter->value();
      iter->Next();
    }
    ASSERT_OK(iter->status());
  }

  // out of scope, block cache should have been deleted, all allocations
  // deallocated
Y
Yi Wu 已提交
3257 3258
  EXPECT_EQ(custom_memory_allocator->numAllocations.load(),
            custom_memory_allocator->numDeallocations.load());
3259
  // make sure that allocations actually happened through the cache allocator
Y
Yi Wu 已提交
3260
  EXPECT_GT(custom_memory_allocator->numAllocations.load(), 0);
3261 3262
}

3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289
// Test the file checksum of block based table
TEST_P(BlockBasedTableTest, NoFileChecksum) {
  Options options;
  ImmutableCFOptions ioptions(options);
  MutableCFOptions moptions(options);
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  std::unique_ptr<InternalKeyComparator> comparator(
      new InternalKeyComparator(BytewiseComparator()));
  int level = 0;
  std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
      int_tbl_prop_collector_factories;
  std::string column_family_name;

  FileChecksumTestHelper f(true);
  f.CreateWriteableFile();
  std::unique_ptr<TableBuilder> builder;
  builder.reset(ioptions.table_factory->NewTableBuilder(
      TableBuilderOptions(ioptions, moptions, *comparator,
                          &int_tbl_prop_collector_factories,
                          options.compression, options.sample_for_compression,
                          options.compression_opts, false /* skip_filters */,
                          column_family_name, level),
      TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
      f.GetFileWriter()));
  f.ResetTableBuilder(std::move(builder));
  f.AddKVtoKVMap(1000);
  f.WriteKVAndFlushTable();
3290 3291
  ASSERT_STREQ(f.GetFileChecksumFuncName(), kUnknownFileChecksumFuncName);
  ASSERT_STREQ(f.GetFileChecksum().c_str(), kUnknownFileChecksum);
3292 3293
}

3294
TEST_P(BlockBasedTableTest, Crc32cFileChecksum) {
3295 3296
  FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
      new FileChecksumGenCrc32cFactory();
3297
  Options options;
3298
  options.file_checksum_gen_factory.reset(file_checksum_gen_factory);
3299 3300 3301 3302 3303 3304 3305 3306 3307 3308
  ImmutableCFOptions ioptions(options);
  MutableCFOptions moptions(options);
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  std::unique_ptr<InternalKeyComparator> comparator(
      new InternalKeyComparator(BytewiseComparator()));
  int level = 0;
  std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
      int_tbl_prop_collector_factories;
  std::string column_family_name;

3309 3310
  FileChecksumGenContext gen_context;
  gen_context.file_name = "db/tmp";
3311
  std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen1 =
3312 3313
      options.file_checksum_gen_factory->CreateFileChecksumGenerator(
          gen_context);
3314 3315
  FileChecksumTestHelper f(true);
  f.CreateWriteableFile();
3316
  f.SetFileChecksumGenerator(checksum_crc32c_gen1.release());
3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329
  std::unique_ptr<TableBuilder> builder;
  builder.reset(ioptions.table_factory->NewTableBuilder(
      TableBuilderOptions(ioptions, moptions, *comparator,
                          &int_tbl_prop_collector_factories,
                          options.compression, options.sample_for_compression,
                          options.compression_opts, false /* skip_filters */,
                          column_family_name, level),
      TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
      f.GetFileWriter()));
  f.ResetTableBuilder(std::move(builder));
  f.AddKVtoKVMap(1000);
  f.WriteKVAndFlushTable();
  ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c");
3330

3331
  std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen2 =
3332 3333
      options.file_checksum_gen_factory->CreateFileChecksumGenerator(
          gen_context);
3334
  std::string checksum;
3335
  ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum));
3336
  ASSERT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str());
3337 3338 3339 3340 3341 3342 3343 3344 3345 3346

  // Unit test the generator itself for schema stability
  std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen3 =
      options.file_checksum_gen_factory->CreateFileChecksumGenerator(
          gen_context);
  const char data[] = "here is some data";
  checksum_crc32c_gen3->Update(data, sizeof(data));
  checksum_crc32c_gen3->Finalize();
  checksum = checksum_crc32c_gen3->GetChecksum();
  ASSERT_STREQ(checksum.c_str(), "\345\245\277\110");
3347 3348
}

3349 3350
// Plain table is not supported in ROCKSDB_LITE
#ifndef ROCKSDB_LITE
I
Igor Sugak 已提交
3351
TEST_F(PlainTableTest, BasicPlainTableProperties) {
S
Stanislau Hlebik 已提交
3352 3353 3354 3355 3356 3357
  PlainTableOptions plain_table_options;
  plain_table_options.user_key_len = 8;
  plain_table_options.bloom_bits_per_key = 8;
  plain_table_options.hash_table_ratio = 0;

  PlainTableFactory factory(plain_table_options);
A
Andres Notzli 已提交
3358
  test::StringSink sink;
3359
  std::unique_ptr<WritableFileWriter> file_writer(
3360
      test::GetWritableFileWriter(new test::StringSink(), "" /* don't care */));
3361
  Options options;
L
Lei Jin 已提交
3362
  const ImmutableCFOptions ioptions(options);
3363
  const MutableCFOptions moptions(options);
3364
  InternalKeyComparator ikc(options.comparator);
3365 3366
  std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
      int_tbl_prop_collector_factories;
3367
  std::string column_family_name;
3368
  int unknown_level = -1;
3369
  std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
3370 3371 3372 3373
      TableBuilderOptions(
          ioptions, moptions, ikc, &int_tbl_prop_collector_factories,
          kNoCompression, 0 /* sample_for_compression */, CompressionOptions(),
          false /* skip_filters */, column_family_name, unknown_level),
3374
      TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
3375
      file_writer.get()));
K
Kai Liu 已提交
3376 3377

  for (char c = 'a'; c <= 'z'; ++c) {
3378 3379
    std::string key(8, c);
    key.append("\1       ");  // PlainTable expects internal key structure
K
Kai Liu 已提交
3380 3381 3382 3383
    std::string value(28, c + 42);
    builder->Add(key, value);
  }
  ASSERT_OK(builder->Finish());
3384
  file_writer->Flush();
K
Kai Liu 已提交
3385

A
Andres Notzli 已提交
3386
  test::StringSink* ss =
3387
      ROCKSDB_NAMESPACE::test::GetStringSinkFromLegacyWriter(file_writer.get());
3388
  std::unique_ptr<RandomAccessFileReader> file_reader(
3389
      test::GetRandomAccessFileReader(
A
Andres Notzli 已提交
3390
          new test::StringSource(ss->contents(), 72242, true)));
K
Kai Liu 已提交
3391

K
kailiu 已提交
3392
  TableProperties* props = nullptr;
3393
  auto s = ReadTableProperties(file_reader.get(), ss->contents().size(),
3394
                               kPlainTableMagicNumber, ioptions,
3395
                               &props, true /* compression_type_missing */);
K
Kai Liu 已提交
3396
  std::unique_ptr<TableProperties> props_guard(props);
K
Kai Liu 已提交
3397 3398
  ASSERT_OK(s);

K
kailiu 已提交
3399 3400 3401 3402 3403 3404
  ASSERT_EQ(0ul, props->index_size);
  ASSERT_EQ(0ul, props->filter_size);
  ASSERT_EQ(16ul * 26, props->raw_key_size);
  ASSERT_EQ(28ul * 26, props->raw_value_size);
  ASSERT_EQ(26ul, props->num_entries);
  ASSERT_EQ(1ul, props->num_data_blocks);
K
Kai Liu 已提交
3405
}
3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434

TEST_F(PlainTableTest, NoFileChecksum) {
  PlainTableOptions plain_table_options;
  plain_table_options.user_key_len = 20;
  plain_table_options.bloom_bits_per_key = 8;
  plain_table_options.hash_table_ratio = 0;
  PlainTableFactory factory(plain_table_options);

  Options options;
  const ImmutableCFOptions ioptions(options);
  const MutableCFOptions moptions(options);
  InternalKeyComparator ikc(options.comparator);
  std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
      int_tbl_prop_collector_factories;
  std::string column_family_name;
  int unknown_level = -1;
  FileChecksumTestHelper f(true);
  f.CreateWriteableFile();

  std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
      TableBuilderOptions(
          ioptions, moptions, ikc, &int_tbl_prop_collector_factories,
          kNoCompression, 0 /* sample_for_compression */, CompressionOptions(),
          false /* skip_filters */, column_family_name, unknown_level),
      TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
      f.GetFileWriter()));
  f.ResetTableBuilder(std::move(builder));
  f.AddKVtoKVMap(1000);
  f.WriteKVAndFlushTable();
3435 3436
  ASSERT_STREQ(f.GetFileChecksumFuncName(), kUnknownFileChecksumFuncName);
  EXPECT_EQ(f.GetFileChecksum(), kUnknownFileChecksum);
3437 3438
}

3439
TEST_F(PlainTableTest, Crc32cFileChecksum) {
3440 3441 3442 3443 3444 3445
  PlainTableOptions plain_table_options;
  plain_table_options.user_key_len = 20;
  plain_table_options.bloom_bits_per_key = 8;
  plain_table_options.hash_table_ratio = 0;
  PlainTableFactory factory(plain_table_options);

3446 3447
  FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
      new FileChecksumGenCrc32cFactory();
3448
  Options options;
3449
  options.file_checksum_gen_factory.reset(file_checksum_gen_factory);
3450 3451 3452 3453 3454 3455 3456
  const ImmutableCFOptions ioptions(options);
  const MutableCFOptions moptions(options);
  InternalKeyComparator ikc(options.comparator);
  std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
      int_tbl_prop_collector_factories;
  std::string column_family_name;
  int unknown_level = -1;
3457 3458 3459

  FileChecksumGenContext gen_context;
  gen_context.file_name = "db/tmp";
3460
  std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen1 =
3461 3462
      options.file_checksum_gen_factory->CreateFileChecksumGenerator(
          gen_context);
3463 3464
  FileChecksumTestHelper f(true);
  f.CreateWriteableFile();
3465
  f.SetFileChecksumGenerator(checksum_crc32c_gen1.release());
3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477

  std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
      TableBuilderOptions(
          ioptions, moptions, ikc, &int_tbl_prop_collector_factories,
          kNoCompression, 0 /* sample_for_compression */, CompressionOptions(),
          false /* skip_filters */, column_family_name, unknown_level),
      TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
      f.GetFileWriter()));
  f.ResetTableBuilder(std::move(builder));
  f.AddKVtoKVMap(1000);
  f.WriteKVAndFlushTable();
  ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c");
3478

3479
  std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen2 =
3480 3481
      options.file_checksum_gen_factory->CreateFileChecksumGenerator(
          gen_context);
3482
  std::string checksum;
3483
  ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum));
3484 3485 3486
  EXPECT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str());
}

3487
#endif  // !ROCKSDB_LITE
K
Kai Liu 已提交
3488

I
Igor Sugak 已提交
3489
TEST_F(GeneralTableTest, ApproximateOffsetOfPlain) {
3490
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
J
jorlow@chromium.org 已提交
3491 3492 3493 3494 3495 3496 3497 3498
  c.Add("k01", "hello");
  c.Add("k02", "hello2");
  c.Add("k03", std::string(10000, 'x'));
  c.Add("k04", std::string(200000, 'x'));
  c.Add("k05", std::string(300000, 'x'));
  c.Add("k06", "hello3");
  c.Add("k07", std::string(100000, 'x'));
  std::vector<std::string> keys;
3499
  stl_wrappers::KVMap kvmap;
3500
  Options options;
3501
  test::PlainInternalKeyComparator internal_comparator(options.comparator);
J
jorlow@chromium.org 已提交
3502
  options.compression = kNoCompression;
3503 3504
  BlockBasedTableOptions table_options;
  table_options.block_size = 1024;
L
Lei Jin 已提交
3505
  const ImmutableCFOptions ioptions(options);
3506 3507
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options, internal_comparator,
L
Lei Jin 已提交
3508
           &keys, &kvmap);
J
jorlow@chromium.org 已提交
3509 3510 3511 3512 3513 3514 3515

  ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"),       0,      0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"),       0,      0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"),      0,      0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"),       0,      0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"),       0,      0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"),   10000,  11000));
3516 3517 3518
  // k04 and k05 will be in two consecutive blocks, the index is
  // an arbitrary slice between k04 and k05, either before or after k04a
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 10000, 211000));
J
jorlow@chromium.org 已提交
3519 3520 3521
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"),  210000, 211000));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"),  510000, 511000));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"),  510000, 511000));
S
Sanjay Ghemawat 已提交
3522
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"),  610000, 612000));
3523
  c.ResetTableReader();
J
jorlow@chromium.org 已提交
3524 3525
}

K
Kai Liu 已提交
3526
static void DoCompressionTest(CompressionType comp) {
J
jorlow@chromium.org 已提交
3527
  Random rnd(301);
3528
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
J
jorlow@chromium.org 已提交
3529 3530 3531 3532 3533 3534
  std::string tmp;
  c.Add("k01", "hello");
  c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
  c.Add("k03", "hello3");
  c.Add("k04", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
  std::vector<std::string> keys;
3535
  stl_wrappers::KVMap kvmap;
3536
  Options options;
3537
  test::PlainInternalKeyComparator ikc(options.comparator);
H
heyongqiang 已提交
3538
  options.compression = comp;
3539 3540
  BlockBasedTableOptions table_options;
  table_options.block_size = 1024;
L
Lei Jin 已提交
3541
  const ImmutableCFOptions ioptions(options);
3542 3543
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options, ikc, &keys, &kvmap);
J
jorlow@chromium.org 已提交
3544

3545 3546 3547 3548 3549 3550
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3500));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3500));
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 7000));
3551
  c.ResetTableReader();
J
jorlow@chromium.org 已提交
3552 3553
}

I
Igor Sugak 已提交
3554
TEST_F(GeneralTableTest, ApproximateOffsetOfCompressed) {
K
kailiu 已提交
3555
  std::vector<CompressionType> compression_state;
I
Igor Canadi 已提交
3556
  if (!Snappy_Supported()) {
H
heyongqiang 已提交
3557 3558
    fprintf(stderr, "skipping snappy compression tests\n");
  } else {
K
kailiu 已提交
3559
    compression_state.push_back(kSnappyCompression);
H
heyongqiang 已提交
3560 3561
  }

I
Igor Canadi 已提交
3562
  if (!Zlib_Supported()) {
H
heyongqiang 已提交
3563 3564
    fprintf(stderr, "skipping zlib compression tests\n");
  } else {
K
kailiu 已提交
3565
    compression_state.push_back(kZlibCompression);
H
heyongqiang 已提交
3566 3567
  }

K
kailiu 已提交
3568 3569
  // TODO(kailiu) DoCompressionTest() doesn't work with BZip2.
  /*
I
Igor Canadi 已提交
3570
  if (!BZip2_Supported()) {
A
Albert Strasheim 已提交
3571 3572
    fprintf(stderr, "skipping bzip2 compression tests\n");
  } else {
K
kailiu 已提交
3573
    compression_state.push_back(kBZip2Compression);
A
Albert Strasheim 已提交
3574
  }
K
kailiu 已提交
3575
  */
A
Albert Strasheim 已提交
3576

I
Igor Canadi 已提交
3577 3578
  if (!LZ4_Supported()) {
    fprintf(stderr, "skipping lz4 and lz4hc compression tests\n");
A
Albert Strasheim 已提交
3579
  } else {
K
kailiu 已提交
3580 3581
    compression_state.push_back(kLZ4Compression);
    compression_state.push_back(kLZ4HCCompression);
H
heyongqiang 已提交
3582 3583
  }

3584 3585 3586 3587 3588 3589 3590
  if (!XPRESS_Supported()) {
    fprintf(stderr, "skipping xpress and xpress compression tests\n");
  }
  else {
    compression_state.push_back(kXpressCompression);
  }

K
kailiu 已提交
3591 3592
  for (auto state : compression_state) {
    DoCompressionTest(state);
H
heyongqiang 已提交
3593 3594 3595
  }
}

3596
#ifndef ROCKSDB_VALGRIND_RUN
3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607
TEST_P(ParameterizedHarnessTest, RandomizedHarnessTest) {
  Random rnd(test::RandomSeed() + 5);
  for (int num_entries = 0; num_entries < 2000;
       num_entries += (num_entries < 50 ? 1 : 200)) {
    for (int e = 0; e < num_entries; e++) {
      std::string v;
      Add(test::RandomKey(&rnd, rnd.Skewed(4)),
          test::RandomString(&rnd, rnd.Skewed(5), &v).ToString());
    }
    Test(&rnd);
  }
3608 3609
}

3610
#ifndef ROCKSDB_LITE
3611
TEST_F(DBHarnessTest, RandomizedLongDB) {
3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631
  Random rnd(test::RandomSeed());
  int num_entries = 100000;
  for (int e = 0; e < num_entries; e++) {
    std::string v;
    Add(test::RandomKey(&rnd, rnd.Skewed(4)),
        test::RandomString(&rnd, rnd.Skewed(5), &v).ToString());
  }
  Test(&rnd);

  // We must have created enough data to force merging
  int files = 0;
  for (int level = 0; level < db()->NumberLevels(); level++) {
    std::string value;
    char name[100];
    snprintf(name, sizeof(name), "rocksdb.num-files-at-level%d", level);
    ASSERT_TRUE(db()->GetProperty(name, &value));
    files += atoi(value.c_str());
  }
  ASSERT_GT(files, 0);
}
3632
#endif  // ROCKSDB_LITE
3633
#endif  // ROCKSDB_VALGRIND_RUN
3634

I
Igor Sugak 已提交
3635
class MemTableTest : public testing::Test {};
3636

I
Igor Sugak 已提交
3637
TEST_F(MemTableTest, Simple) {
3638 3639
  InternalKeyComparator cmp(BytewiseComparator());
  auto table_factory = std::make_shared<SkipListFactory>();
I
Igor Canadi 已提交
3640 3641
  Options options;
  options.memtable_factory = table_factory;
3642
  ImmutableCFOptions ioptions(options);
3643
  WriteBufferManager wb(options.db_write_buffer_size);
3644 3645 3646
  MemTable* memtable =
      new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
                   kMaxSequenceNumber, 0 /* column_family_id */);
3647 3648 3649 3650 3651 3652 3653
  memtable->Ref();
  WriteBatch batch;
  WriteBatchInternal::SetSequence(&batch, 100);
  batch.Put(std::string("k1"), std::string("v1"));
  batch.Put(std::string("k2"), std::string("v2"));
  batch.Put(std::string("k3"), std::string("v3"));
  batch.Put(std::string("largekey"), std::string("vlarge"));
3654 3655
  batch.DeleteRange(std::string("chi"), std::string("xigua"));
  batch.DeleteRange(std::string("begin"), std::string("end"));
3656
  ColumnFamilyMemTablesDefault cf_mems_default(memtable);
3657
  ASSERT_TRUE(
3658 3659
      WriteBatchInternal::InsertInto(&batch, &cf_mems_default, nullptr, nullptr)
          .ok());
3660

3661 3662
  for (int i = 0; i < 2; ++i) {
    Arena arena;
3663 3664 3665 3666 3667 3668 3669
    ScopedArenaIterator arena_iter_guard;
    std::unique_ptr<InternalIterator> iter_guard;
    InternalIterator* iter;
    if (i == 0) {
      iter = memtable->NewIterator(ReadOptions(), &arena);
      arena_iter_guard.set(iter);
    } else {
3670 3671
      iter = memtable->NewRangeTombstoneIterator(
          ReadOptions(), kMaxSequenceNumber /* read_seq */);
3672 3673
      iter_guard.reset(iter);
    }
A
Andrew Kryczka 已提交
3674 3675 3676
    if (iter == nullptr) {
      continue;
    }
3677 3678 3679 3680 3681 3682
    iter->SeekToFirst();
    while (iter->Valid()) {
      fprintf(stderr, "key: '%s' -> '%s'\n", iter->key().ToString().c_str(),
              iter->value().ToString().c_str());
      iter->Next();
    }
3683 3684
  }

3685
  delete memtable->Unref();
3686 3687
}

3688
// Test the empty key
3689 3690 3691 3692
TEST_P(ParameterizedHarnessTest, SimpleEmptyKey) {
  Random rnd(test::RandomSeed() + 1);
  Add("", "v");
  Test(&rnd);
3693 3694
}

3695 3696 3697 3698
TEST_P(ParameterizedHarnessTest, SimpleSingle) {
  Random rnd(test::RandomSeed() + 2);
  Add("abc", "v");
  Test(&rnd);
3699 3700
}

3701 3702 3703 3704 3705 3706
TEST_P(ParameterizedHarnessTest, SimpleMulti) {
  Random rnd(test::RandomSeed() + 3);
  Add("abc", "v");
  Add("abcd", "v");
  Add("ac", "v2");
  Test(&rnd);
3707 3708
}

3709 3710 3711 3712
TEST_P(ParameterizedHarnessTest, SimpleSpecialKey) {
  Random rnd(test::RandomSeed() + 4);
  Add("\xff\xff", "v3");
  Test(&rnd);
3713
}
3714

3715
TEST(TableTest, FooterTests) {
I
xxHash  
Igor Canadi 已提交
3716 3717 3718
  {
    // upconvert legacy block based
    std::string encoded;
3719
    Footer footer(kLegacyBlockBasedTableMagicNumber, 0);
I
xxHash  
Igor Canadi 已提交
3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732
    BlockHandle meta_index(10, 5), index(20, 15);
    footer.set_metaindex_handle(meta_index);
    footer.set_index_handle(index);
    footer.EncodeTo(&encoded);
    Footer decoded_footer;
    Slice encoded_slice(encoded);
    decoded_footer.DecodeFrom(&encoded_slice);
    ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
    ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
    ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
    ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
    ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
    ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3733
    ASSERT_EQ(decoded_footer.version(), 0U);
I
xxHash  
Igor Canadi 已提交
3734 3735 3736 3737
  }
  {
    // xxhash block based
    std::string encoded;
3738
    Footer footer(kBlockBasedTableMagicNumber, 1);
I
xxHash  
Igor Canadi 已提交
3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752
    BlockHandle meta_index(10, 5), index(20, 15);
    footer.set_metaindex_handle(meta_index);
    footer.set_index_handle(index);
    footer.set_checksum(kxxHash);
    footer.EncodeTo(&encoded);
    Footer decoded_footer;
    Slice encoded_slice(encoded);
    decoded_footer.DecodeFrom(&encoded_slice);
    ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
    ASSERT_EQ(decoded_footer.checksum(), kxxHash);
    ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
    ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
    ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
    ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3753
    ASSERT_EQ(decoded_footer.version(), 1U);
I
xxHash  
Igor Canadi 已提交
3754
  }
B
Bo Hou 已提交
3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774
  {
    // xxhash64 block based
    std::string encoded;
    Footer footer(kBlockBasedTableMagicNumber, 1);
    BlockHandle meta_index(10, 5), index(20, 15);
    footer.set_metaindex_handle(meta_index);
    footer.set_index_handle(index);
    footer.set_checksum(kxxHash64);
    footer.EncodeTo(&encoded);
    Footer decoded_footer;
    Slice encoded_slice(encoded);
    decoded_footer.DecodeFrom(&encoded_slice);
    ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
    ASSERT_EQ(decoded_footer.checksum(), kxxHash64);
    ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
    ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
    ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
    ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
    ASSERT_EQ(decoded_footer.version(), 1U);
  }
3775 3776
// Plain table is not supported in ROCKSDB_LITE
#ifndef ROCKSDB_LITE
I
xxHash  
Igor Canadi 已提交
3777 3778 3779
  {
    // upconvert legacy plain table
    std::string encoded;
3780
    Footer footer(kLegacyPlainTableMagicNumber, 0);
I
xxHash  
Igor Canadi 已提交
3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793
    BlockHandle meta_index(10, 5), index(20, 15);
    footer.set_metaindex_handle(meta_index);
    footer.set_index_handle(index);
    footer.EncodeTo(&encoded);
    Footer decoded_footer;
    Slice encoded_slice(encoded);
    decoded_footer.DecodeFrom(&encoded_slice);
    ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
    ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
    ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
    ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
    ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
    ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3794
    ASSERT_EQ(decoded_footer.version(), 0U);
I
xxHash  
Igor Canadi 已提交
3795 3796 3797 3798
  }
  {
    // xxhash block based
    std::string encoded;
3799
    Footer footer(kPlainTableMagicNumber, 1);
I
xxHash  
Igor Canadi 已提交
3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813
    BlockHandle meta_index(10, 5), index(20, 15);
    footer.set_metaindex_handle(meta_index);
    footer.set_index_handle(index);
    footer.set_checksum(kxxHash);
    footer.EncodeTo(&encoded);
    Footer decoded_footer;
    Slice encoded_slice(encoded);
    decoded_footer.DecodeFrom(&encoded_slice);
    ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
    ASSERT_EQ(decoded_footer.checksum(), kxxHash);
    ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
    ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
    ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
    ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3814 3815
    ASSERT_EQ(decoded_footer.version(), 1U);
  }
3816
#endif  // !ROCKSDB_LITE
3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834
  {
    // version == 2
    std::string encoded;
    Footer footer(kBlockBasedTableMagicNumber, 2);
    BlockHandle meta_index(10, 5), index(20, 15);
    footer.set_metaindex_handle(meta_index);
    footer.set_index_handle(index);
    footer.EncodeTo(&encoded);
    Footer decoded_footer;
    Slice encoded_slice(encoded);
    decoded_footer.DecodeFrom(&encoded_slice);
    ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
    ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
    ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
    ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
    ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
    ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
    ASSERT_EQ(decoded_footer.version(), 2U);
I
xxHash  
Igor Canadi 已提交
3835 3836 3837
  }
}

3838
class IndexBlockRestartIntervalTest
3839
    : public TableTest,
3840
      public ::testing::WithParamInterface<std::pair<int, bool>> {
3841
 public:
3842 3843 3844 3845 3846
  static std::vector<std::pair<int, bool>> GetRestartValues() {
    return {{-1, false}, {0, false},  {1, false}, {8, false},
            {16, false}, {32, false}, {-1, true}, {0, true},
            {1, true},   {8, true},   {16, true}, {32, true}};
  }
3847 3848
};

3849
INSTANTIATE_TEST_CASE_P(
3850 3851 3852 3853 3854 3855 3856 3857
    IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest,
    ::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues()));

TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) {
  const int kKeysInTable = 10000;
  const int kKeySize = 100;
  const int kValSize = 500;

3858 3859
  const int index_block_restart_interval = std::get<0>(GetParam());
  const bool value_delta_encoding = std::get<1>(GetParam());
3860 3861 3862 3863 3864

  Options options;
  BlockBasedTableOptions table_options;
  table_options.block_size = 64;  // small block size to get big index block
  table_options.index_block_restart_interval = index_block_restart_interval;
3865 3866 3867
  if (value_delta_encoding) {
    table_options.format_version = 4;
  }
3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881
  options.table_factory.reset(new BlockBasedTableFactory(table_options));

  TableConstructor c(BytewiseComparator());
  static Random rnd(301);
  for (int i = 0; i < kKeysInTable; i++) {
    InternalKey k(RandomString(&rnd, kKeySize), 0, kTypeValue);
    c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize));
  }

  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  std::unique_ptr<InternalKeyComparator> comparator(
      new InternalKeyComparator(BytewiseComparator()));
  const ImmutableCFOptions ioptions(options);
3882 3883 3884
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
           &kvmap);
3885 3886
  auto reader = c.GetTableReader();

3887 3888 3889
  std::unique_ptr<InternalIterator> db_iter(reader->NewIterator(
      ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized));
3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908

  // Test point lookup
  for (auto& kv : kvmap) {
    db_iter->Seek(kv.first);

    ASSERT_TRUE(db_iter->Valid());
    ASSERT_OK(db_iter->status());
    ASSERT_EQ(db_iter->key(), kv.first);
    ASSERT_EQ(db_iter->value(), kv.second);
  }

  // Test iterating
  auto kv_iter = kvmap.begin();
  for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
    ASSERT_EQ(db_iter->key(), kv_iter->first);
    ASSERT_EQ(db_iter->value(), kv_iter->second);
    kv_iter++;
  }
  ASSERT_EQ(kv_iter, kvmap.end());
3909
  c.ResetTableReader();
3910 3911
}

3912 3913 3914
class PrefixTest : public testing::Test {
 public:
  PrefixTest() : testing::Test() {}
3915
  ~PrefixTest() override {}
3916 3917 3918 3919
};

namespace {
// A simple PrefixExtractor that only works for test PrefixAndWholeKeyTest
3920
class TestPrefixExtractor : public ROCKSDB_NAMESPACE::SliceTransform {
3921 3922 3923 3924
 public:
  ~TestPrefixExtractor() override{};
  const char* Name() const override { return "TestPrefixExtractor"; }

3925 3926
  ROCKSDB_NAMESPACE::Slice Transform(
      const ROCKSDB_NAMESPACE::Slice& src) const override {
3927
    assert(IsValid(src));
3928
    return ROCKSDB_NAMESPACE::Slice(src.data(), 3);
3929 3930
  }

3931
  bool InDomain(const ROCKSDB_NAMESPACE::Slice& src) const override {
3932 3933 3934 3935
    assert(IsValid(src));
    return true;
  }

3936 3937 3938
  bool InRange(const ROCKSDB_NAMESPACE::Slice& /*dst*/) const override {
    return true;
  }
3939

3940
  bool IsValid(const ROCKSDB_NAMESPACE::Slice& src) const {
3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961
    if (src.size() != 4) {
      return false;
    }
    if (src[0] != '[') {
      return false;
    }
    if (src[1] < '0' || src[1] > '9') {
      return false;
    }
    if (src[2] != ']') {
      return false;
    }
    if (src[3] < '0' || src[3] > '9') {
      return false;
    }
    return true;
  }
};
}  // namespace

TEST_F(PrefixTest, PrefixAndWholeKeyTest) {
3962 3963
  ROCKSDB_NAMESPACE::Options options;
  options.compaction_style = ROCKSDB_NAMESPACE::kCompactionStyleUniversal;
3964 3965 3966 3967 3968
  options.num_levels = 20;
  options.create_if_missing = true;
  options.optimize_filters_for_hits = false;
  options.target_file_size_base = 268435456;
  options.prefix_extractor = std::make_shared<TestPrefixExtractor>();
3969 3970
  ROCKSDB_NAMESPACE::BlockBasedTableOptions bbto;
  bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10));
3971 3972 3973
  bbto.block_size = 262144;
  bbto.whole_key_filtering = true;

3974
  const std::string kDBPath = test::PerThreadDBPath("table_prefix_test");
3975 3976
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  DestroyDB(kDBPath, options);
3977 3978
  ROCKSDB_NAMESPACE::DB* db;
  ASSERT_OK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
3979 3980 3981 3982 3983 3984

  // Create a bunch of keys with 10 filters.
  for (int i = 0; i < 10; i++) {
    std::string prefix = "[" + std::to_string(i) + "]";
    for (int j = 0; j < 10; j++) {
      std::string key = prefix + std::to_string(j);
3985
      db->Put(ROCKSDB_NAMESPACE::WriteOptions(), key, "1");
3986 3987 3988 3989 3990 3991 3992 3993 3994 3995
    }
  }

  // Trigger compaction.
  db->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  delete db;
  // In the second round, turn whole_key_filtering off and expect
  // rocksdb still works.
}

3996 3997 3998 3999 4000 4001 4002 4003
/*
 * Disable TableWithGlobalSeqno since RocksDB does not store global_seqno in
 * the SST file any more. Instead, RocksDB deduces global_seqno from the
 * MANIFEST while reading from an SST. Therefore, it's not possible to test the
 * functionality of global_seqno in a single, isolated unit test without the
 * involvement of Version, VersionSet, etc.
 */
TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) {
4004
  BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4005
  test::StringSink* sink = new test::StringSink();
4006
  std::unique_ptr<WritableFileWriter> file_writer(
4007
      test::GetWritableFileWriter(sink, "" /* don't care */));
4008 4009 4010
  Options options;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  const ImmutableCFOptions ioptions(options);
4011
  const MutableCFOptions moptions(options);
4012 4013 4014 4015 4016 4017 4018 4019
  InternalKeyComparator ikc(options.comparator);
  std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
      int_tbl_prop_collector_factories;
  int_tbl_prop_collector_factories.emplace_back(
      new SstFileWriterPropertiesCollectorFactory(2 /* version */,
                                                  0 /* global_seqno*/));
  std::string column_family_name;
  std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
4020 4021
      TableBuilderOptions(ioptions, moptions, ikc,
                          &int_tbl_prop_collector_factories, kNoCompression,
4022 4023
                          0 /* sample_for_compression */, CompressionOptions(),
                          false /* skip_filters */, column_family_name, -1),
4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043
      TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
      file_writer.get()));

  for (char c = 'a'; c <= 'z'; ++c) {
    std::string key(8, c);
    std::string value = key;
    InternalKey ik(key, 0, kTypeValue);

    builder->Add(ik.Encode(), value);
  }
  ASSERT_OK(builder->Finish());
  file_writer->Flush();

  test::RandomRWStringSink ss_rw(sink);
  uint32_t version;
  uint64_t global_seqno;
  uint64_t global_seqno_offset;

  // Helper function to get version, global_seqno, global_seqno_offset
  std::function<void()> GetVersionAndGlobalSeqno = [&]() {
4044
    std::unique_ptr<RandomAccessFileReader> file_reader(
4045 4046 4047 4048 4049 4050
        test::GetRandomAccessFileReader(
            new test::StringSource(ss_rw.contents(), 73342, true)));

    TableProperties* props = nullptr;
    ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(),
                                  kBlockBasedTableMagicNumber, ioptions,
4051
                                  &props, true /* compression_type_missing */));
4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072

    UserCollectedProperties user_props = props->user_collected_properties;
    version = DecodeFixed32(
        user_props[ExternalSstFilePropertyNames::kVersion].c_str());
    global_seqno = DecodeFixed64(
        user_props[ExternalSstFilePropertyNames::kGlobalSeqno].c_str());
    global_seqno_offset =
        props->properties_offsets[ExternalSstFilePropertyNames::kGlobalSeqno];

    delete props;
  };

  // Helper function to update the value of the global seqno in the file
  std::function<void(uint64_t)> SetGlobalSeqno = [&](uint64_t val) {
    std::string new_global_seqno;
    PutFixed64(&new_global_seqno, val);

    ASSERT_OK(ss_rw.Write(global_seqno_offset, new_global_seqno));
  };

  // Helper function to get the contents of the table InternalIterator
4073
  std::unique_ptr<TableReader> table_reader;
4074
  std::function<InternalIterator*()> GetTableInternalIter = [&]() {
4075
    std::unique_ptr<RandomAccessFileReader> file_reader(
4076 4077 4078 4079
        test::GetRandomAccessFileReader(
            new test::StringSource(ss_rw.contents(), 73342, true)));

    options.table_factory->NewTableReader(
4080 4081 4082
        TableReaderOptions(ioptions, moptions.prefix_extractor.get(),
                           EnvOptions(), ikc),
        std::move(file_reader), ss_rw.contents().size(), &table_reader);
4083

4084 4085 4086
    return table_reader->NewIterator(
        ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr,
        /*skip_filters=*/false, TableReaderCaller::kUncategorized);
4087 4088 4089
  };

  GetVersionAndGlobalSeqno();
4090 4091
  ASSERT_EQ(2u, version);
  ASSERT_EQ(0u, global_seqno);
4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110

  InternalIterator* iter = GetTableInternalIter();
  char current_c = 'a';
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
    ParsedInternalKey pik;
    ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));

    ASSERT_EQ(pik.type, ValueType::kTypeValue);
    ASSERT_EQ(pik.sequence, 0);
    ASSERT_EQ(pik.user_key, iter->value());
    ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
    current_c++;
  }
  ASSERT_EQ(current_c, 'z' + 1);
  delete iter;

  // Update global sequence number to 10
  SetGlobalSeqno(10);
  GetVersionAndGlobalSeqno();
4111 4112
  ASSERT_EQ(2u, version);
  ASSERT_EQ(10u, global_seqno);
4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147

  iter = GetTableInternalIter();
  current_c = 'a';
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
    ParsedInternalKey pik;
    ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));

    ASSERT_EQ(pik.type, ValueType::kTypeValue);
    ASSERT_EQ(pik.sequence, 10);
    ASSERT_EQ(pik.user_key, iter->value());
    ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
    current_c++;
  }
  ASSERT_EQ(current_c, 'z' + 1);

  // Verify Seek
  for (char c = 'a'; c <= 'z'; c++) {
    std::string k = std::string(8, c);
    InternalKey ik(k, 10, kValueTypeForSeek);
    iter->Seek(ik.Encode());
    ASSERT_TRUE(iter->Valid());

    ParsedInternalKey pik;
    ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));

    ASSERT_EQ(pik.type, ValueType::kTypeValue);
    ASSERT_EQ(pik.sequence, 10);
    ASSERT_EQ(pik.user_key.ToString(), k);
    ASSERT_EQ(iter->value().ToString(), k);
  }
  delete iter;

  // Update global sequence number to 3
  SetGlobalSeqno(3);
  GetVersionAndGlobalSeqno();
4148 4149
  ASSERT_EQ(2u, version);
  ASSERT_EQ(3u, global_seqno);
4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184

  iter = GetTableInternalIter();
  current_c = 'a';
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
    ParsedInternalKey pik;
    ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));

    ASSERT_EQ(pik.type, ValueType::kTypeValue);
    ASSERT_EQ(pik.sequence, 3);
    ASSERT_EQ(pik.user_key, iter->value());
    ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
    current_c++;
  }
  ASSERT_EQ(current_c, 'z' + 1);

  // Verify Seek
  for (char c = 'a'; c <= 'z'; c++) {
    std::string k = std::string(8, c);
    // seqno=4 is less than 3 so we still should get our key
    InternalKey ik(k, 4, kValueTypeForSeek);
    iter->Seek(ik.Encode());
    ASSERT_TRUE(iter->Valid());

    ParsedInternalKey pik;
    ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));

    ASSERT_EQ(pik.type, ValueType::kTypeValue);
    ASSERT_EQ(pik.sequence, 3);
    ASSERT_EQ(pik.user_key.ToString(), k);
    ASSERT_EQ(iter->value().ToString(), k);
  }

  delete iter;
}

4185 4186
TEST_P(BlockBasedTableTest, BlockAlignTest) {
  BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4187 4188
  bbto.block_align = true;
  test::StringSink* sink = new test::StringSink();
4189
  std::unique_ptr<WritableFileWriter> file_writer(
4190
      test::GetWritableFileWriter(sink, "" /* don't care */));
4191 4192 4193 4194
  Options options;
  options.compression = kNoCompression;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  const ImmutableCFOptions ioptions(options);
4195
  const MutableCFOptions moptions(options);
4196 4197 4198 4199 4200
  InternalKeyComparator ikc(options.comparator);
  std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
      int_tbl_prop_collector_factories;
  std::string column_family_name;
  std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
4201 4202
      TableBuilderOptions(ioptions, moptions, ikc,
                          &int_tbl_prop_collector_factories, kNoCompression,
4203 4204
                          0 /* sample_for_compression */, CompressionOptions(),
                          false /* skip_filters */, column_family_name, -1),
4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220
      TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
      file_writer.get()));

  for (int i = 1; i <= 10000; ++i) {
    std::ostringstream ostr;
    ostr << std::setfill('0') << std::setw(5) << i;
    std::string key = ostr.str();
    std::string value = "val";
    InternalKey ik(key, 0, kTypeValue);

    builder->Add(ik.Encode(), value);
  }
  ASSERT_OK(builder->Finish());
  file_writer->Flush();

  test::RandomRWStringSink ss_rw(sink);
4221
  std::unique_ptr<RandomAccessFileReader> file_reader(
4222 4223 4224 4225 4226 4227 4228 4229
      test::GetRandomAccessFileReader(
          new test::StringSource(ss_rw.contents(), 73342, true)));

  // Helper function to get version, global_seqno, global_seqno_offset
  std::function<void()> VerifyBlockAlignment = [&]() {
    TableProperties* props = nullptr;
    ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(),
                                  kBlockBasedTableMagicNumber, ioptions,
4230
                                  &props, true /* compression_type_missing */));
4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247

    uint64_t data_block_size = props->data_size / props->num_data_blocks;
    ASSERT_EQ(data_block_size, 4096);
    ASSERT_EQ(props->data_size, data_block_size * props->num_data_blocks);
    delete props;
  };

  VerifyBlockAlignment();

  // The below block of code verifies that we can read back the keys. Set
  // block_align to false when creating the reader to ensure we can flip between
  // the two modes without any issues
  std::unique_ptr<TableReader> table_reader;
  bbto.block_align = false;
  Options options2;
  options2.table_factory.reset(NewBlockBasedTableFactory(bbto));
  ImmutableCFOptions ioptions2(options2);
4248 4249
  const MutableCFOptions moptions2(options2);

4250
  ASSERT_OK(ioptions.table_factory->NewTableReader(
4251 4252
      TableReaderOptions(ioptions2, moptions2.prefix_extractor.get(),
                         EnvOptions(),
4253 4254 4255
                         GetPlainInternalComparator(options2.comparator)),
      std::move(file_reader), ss_rw.contents().size(), &table_reader));

4256
  std::unique_ptr<InternalIterator> db_iter(table_reader->NewIterator(
4257 4258
      ReadOptions(), moptions2.prefix_extractor.get(), /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized));
4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275

  int expected_key = 1;
  for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
    std::ostringstream ostr;
    ostr << std::setfill('0') << std::setw(5) << expected_key++;
    std::string key = ostr.str();
    std::string value = "val";

    ASSERT_OK(db_iter->status());
    ASSERT_EQ(ExtractUserKey(db_iter->key()).ToString(), key);
    ASSERT_EQ(db_iter->value().ToString(), value);
  }
  expected_key--;
  ASSERT_EQ(expected_key, 10000);
  table_reader.reset();
}

4276 4277 4278 4279
TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
  BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
  bbto.block_align = true;
  test::StringSink* sink = new test::StringSink();
4280
  std::unique_ptr<WritableFileWriter> file_writer(
4281
      test::GetWritableFileWriter(sink, "" /* don't care */));
4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296

  Options options;
  options.compression = kNoCompression;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));

  const ImmutableCFOptions ioptions(options);
  const MutableCFOptions moptions(options);
  InternalKeyComparator ikc(options.comparator);
  std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
      int_tbl_prop_collector_factories;
  std::string column_family_name;

  std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
      TableBuilderOptions(ioptions, moptions, ikc,
                          &int_tbl_prop_collector_factories, kNoCompression,
4297 4298
                          0 /* sample_for_compression */, CompressionOptions(),
                          false /* skip_filters */, column_family_name, -1),
4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314
      TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
      file_writer.get()));

  for (int i = 1; i <= 10000; ++i) {
    std::ostringstream ostr;
    ostr << std::setfill('0') << std::setw(5) << i;
    std::string key = ostr.str();
    std::string value = "val";
    InternalKey ik(key, 0, kTypeValue);

    builder->Add(ik.Encode(), value);
  }
  ASSERT_OK(builder->Finish());
  file_writer->Flush();

  test::RandomRWStringSink ss_rw(sink);
4315
  std::unique_ptr<RandomAccessFileReader> file_reader(
4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326
      test::GetRandomAccessFileReader(
          new test::StringSource(ss_rw.contents(), 73342, true)));

  {
    RandomAccessFileReader* file = file_reader.get();
    uint64_t file_size = ss_rw.contents().size();

    Footer footer;
    ASSERT_OK(ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size,
                                 &footer, kBlockBasedTableMagicNumber));

4327
    auto BlockFetchHelper = [&](const BlockHandle& handle, BlockType block_type,
4328 4329 4330 4331 4332
                                BlockContents* contents) {
      ReadOptions read_options;
      read_options.verify_checksums = false;
      PersistentCacheOptions cache_options;

4333 4334 4335
      BlockFetcher block_fetcher(
          file, nullptr /* prefetch_buffer */, footer, read_options, handle,
          contents, ioptions, false /* decompress */,
4336 4337
          false /*maybe_compressed*/, block_type,
          UncompressionDict::GetEmptyDict(), cache_options);
4338 4339 4340 4341 4342 4343 4344 4345

      ASSERT_OK(block_fetcher.ReadBlockContents());
    };

    // -- Read metaindex block
    auto metaindex_handle = footer.metaindex_handle();
    BlockContents metaindex_contents;

4346
    BlockFetchHelper(metaindex_handle, BlockType::kMetaIndex,
4347
                     &metaindex_contents);
4348
    Block metaindex_block(std::move(metaindex_contents));
4349

4350
    std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator(
4351 4352
        BytewiseComparator(), BytewiseComparator(),
        kDisableGlobalSequenceNumber));
4353 4354 4355 4356 4357 4358 4359 4360 4361 4362
    bool found_properties_block = true;
    ASSERT_OK(SeekToPropertiesBlock(meta_iter.get(), &found_properties_block));
    ASSERT_TRUE(found_properties_block);

    // -- Read properties block
    Slice v = meta_iter->value();
    BlockHandle properties_handle;
    ASSERT_OK(properties_handle.DecodeFrom(&v));
    BlockContents properties_contents;

4363
    BlockFetchHelper(properties_handle, BlockType::kProperties,
4364
                     &properties_contents);
4365
    Block properties_block(std::move(properties_contents));
4366

4367
    ASSERT_EQ(properties_block.NumRestarts(), 1u);
4368 4369 4370
  }
}

4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421
TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) {
  // The properties meta-block should come at the end since we always need to
  // read it when opening a file, unlike index/filter/other meta-blocks, which
  // are sometimes read depending on the user's configuration. This ordering
  // allows us to do a small readahead on the end of the file to read properties
  // and meta-index blocks with one I/O.
  TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
  c.Add("a1", "val1");
  c.Add("b2", "val2");
  c.Add("c3", "val3");
  c.Add("d4", "val4");
  c.Add("e5", "val5");
  c.Add("f6", "val6");
  c.Add("g7", "val7");
  c.Add("h8", "val8");
  c.Add("j9", "val9");

  // write an SST file
  Options options;
  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  table_options.filter_policy.reset(NewBloomFilterPolicy(
      8 /* bits_per_key */, false /* use_block_based_filter */));
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  ImmutableCFOptions ioptions(options);
  MutableCFOptions moptions(options);
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  c.Finish(options, ioptions, moptions, table_options,
           GetPlainInternalComparator(options.comparator), &keys, &kvmap);

  // get file reader
  test::StringSink* table_sink = c.TEST_GetSink();
  std::unique_ptr<RandomAccessFileReader> table_reader{
      test::GetRandomAccessFileReader(
          new test::StringSource(table_sink->contents(), 0 /* unique_id */,
                                 false /* allow_mmap_reads */))};
  size_t table_size = table_sink->contents().size();

  // read footer
  Footer footer;
  ASSERT_OK(ReadFooterFromFile(table_reader.get(),
                               nullptr /* prefetch_buffer */, table_size,
                               &footer, kBlockBasedTableMagicNumber));

  // read metaindex
  auto metaindex_handle = footer.metaindex_handle();
  BlockContents metaindex_contents;
  PersistentCacheOptions pcache_opts;
  BlockFetcher block_fetcher(
      table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(),
      metaindex_handle, &metaindex_contents, ioptions, false /* decompress */,
4422 4423 4424
      false /*maybe_compressed*/, BlockType::kMetaIndex,
      UncompressionDict::GetEmptyDict(), pcache_opts,
      nullptr /*memory_allocator*/);
4425
  ASSERT_OK(block_fetcher.ReadBlockContents());
4426
  Block metaindex_block(std::move(metaindex_contents));
4427 4428 4429

  // verify properties block comes last
  std::unique_ptr<InternalIterator> metaindex_iter{
4430 4431
      metaindex_block.NewDataIterator(options.comparator, options.comparator,
                                      kDisableGlobalSequenceNumber)};
4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450
  uint64_t max_offset = 0;
  std::string key_at_max_offset;
  for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid();
       metaindex_iter->Next()) {
    BlockHandle handle;
    Slice value = metaindex_iter->value();
    ASSERT_OK(handle.DecodeFrom(&value));
    if (handle.offset() > max_offset) {
      max_offset = handle.offset();
      key_at_max_offset = metaindex_iter->key().ToString();
    }
  }
  ASSERT_EQ(kPropertiesBlock, key_at_max_offset);
  // index handle is stored in footer rather than metaindex block, so need
  // separate logic to verify it comes before properties block.
  ASSERT_GT(max_offset, footer.index_handle().offset());
  c.ResetTableReader();
}

4451
TEST_P(BlockBasedTableTest, BadOptions) {
4452
  ROCKSDB_NAMESPACE::Options options;
4453
  options.compression = kNoCompression;
4454
  BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4455 4456 4457
  bbto.block_size = 4000;
  bbto.block_align = true;

4458
  const std::string kDBPath =
4459
      test::PerThreadDBPath("block_based_table_bad_options_test");
4460 4461
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  DestroyDB(kDBPath, options);
4462 4463
  ROCKSDB_NAMESPACE::DB* db;
  ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
4464 4465 4466 4467

  bbto.block_size = 4096;
  options.compression = kSnappyCompression;
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4468
  ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
4469 4470
}

4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513
TEST_F(BBTTailPrefetchTest, TestTailPrefetchStats) {
  TailPrefetchStats tpstats;
  ASSERT_EQ(0, tpstats.GetSuggestedPrefetchSize());
  tpstats.RecordEffectiveSize(size_t{1000});
  tpstats.RecordEffectiveSize(size_t{1005});
  tpstats.RecordEffectiveSize(size_t{1002});
  ASSERT_EQ(1005, tpstats.GetSuggestedPrefetchSize());

  // One single super large value shouldn't influence much
  tpstats.RecordEffectiveSize(size_t{1002000});
  tpstats.RecordEffectiveSize(size_t{999});
  ASSERT_LE(1005, tpstats.GetSuggestedPrefetchSize());
  ASSERT_GT(1200, tpstats.GetSuggestedPrefetchSize());

  // Only history of 32 is kept
  for (int i = 0; i < 32; i++) {
    tpstats.RecordEffectiveSize(size_t{100});
  }
  ASSERT_EQ(100, tpstats.GetSuggestedPrefetchSize());

  // 16 large values and 16 small values. The result should be closer
  // to the small value as the algorithm.
  for (int i = 0; i < 16; i++) {
    tpstats.RecordEffectiveSize(size_t{1000});
  }
  tpstats.RecordEffectiveSize(size_t{10});
  tpstats.RecordEffectiveSize(size_t{20});
  for (int i = 0; i < 6; i++) {
    tpstats.RecordEffectiveSize(size_t{100});
  }
  ASSERT_LE(80, tpstats.GetSuggestedPrefetchSize());
  ASSERT_GT(200, tpstats.GetSuggestedPrefetchSize());
}

TEST_F(BBTTailPrefetchTest, FilePrefetchBufferMinOffset) {
  TailPrefetchStats tpstats;
  FilePrefetchBuffer buffer(nullptr, 0, 0, false, true);
  buffer.TryReadFromCache(500, 10, nullptr);
  buffer.TryReadFromCache(480, 10, nullptr);
  buffer.TryReadFromCache(490, 10, nullptr);
  ASSERT_EQ(480, buffer.min_offset_read());
}

4514 4515 4516 4517 4518 4519 4520
TEST_P(BlockBasedTableTest, DataBlockHashIndex) {
  const int kNumKeys = 500;
  const int kKeySize = 8;
  const int kValSize = 40;

  BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
  table_options.data_block_index_type =
4521
      BlockBasedTableOptions::kDataBlockBinaryAndHash;
4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548

  Options options;
  options.comparator = BytewiseComparator();

  options.table_factory.reset(new BlockBasedTableFactory(table_options));

  TableConstructor c(options.comparator);

  static Random rnd(1048);
  for (int i = 0; i < kNumKeys; i++) {
    // padding one "0" to mark existent keys.
    std::string random_key(RandomString(&rnd, kKeySize - 1) + "1");
    InternalKey k(random_key, 0, kTypeValue);
    c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize));
  }

  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  const ImmutableCFOptions ioptions(options);
  const MutableCFOptions moptions(options);
  const InternalKeyComparator internal_comparator(options.comparator);
  c.Finish(options, ioptions, moptions, table_options, internal_comparator,
           &keys, &kvmap);

  auto reader = c.GetTableReader();

  std::unique_ptr<InternalIterator> seek_iter;
4549 4550 4551
  seek_iter.reset(reader->NewIterator(
      ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized));
4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574
  for (int i = 0; i < 2; ++i) {
    ReadOptions ro;
    // for every kv, we seek using two method: Get() and Seek()
    // Get() will use the SuffixIndexHash in Block. For non-existent key it
    //      will invalidate the iterator
    // Seek() will use the default BinarySeek() in Block. So for non-existent
    //      key it will land at the closest key that is large than target.

    // Search for existent keys
    for (auto& kv : kvmap) {
      if (i == 0) {
        // Search using Seek()
        seek_iter->Seek(kv.first);
        ASSERT_OK(seek_iter->status());
        ASSERT_TRUE(seek_iter->Valid());
        ASSERT_EQ(seek_iter->key(), kv.first);
        ASSERT_EQ(seek_iter->value(), kv.second);
      } else {
        // Search using Get()
        PinnableSlice value;
        std::string user_key = ExtractUserKey(kv.first).ToString();
        GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
                               GetContext::kNotFound, user_key, &value, nullptr,
4575
                               nullptr, true, nullptr, nullptr);
4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586
        ASSERT_OK(reader->Get(ro, kv.first, &get_context,
                              moptions.prefix_extractor.get()));
        ASSERT_EQ(get_context.State(), GetContext::kFound);
        ASSERT_EQ(value, Slice(kv.second));
        value.Reset();
      }
    }

    // Search for non-existent keys
    for (auto& kv : kvmap) {
      std::string user_key = ExtractUserKey(kv.first).ToString();
4587
      user_key.back() = '0';  // make it non-existent key
4588 4589
      InternalKey internal_key(user_key, 0, kTypeValue);
      std::string encoded_key = internal_key.Encode().ToString();
4590
      if (i == 0) {  // Search using Seek()
4591 4592
        seek_iter->Seek(encoded_key);
        ASSERT_OK(seek_iter->status());
4593
        if (seek_iter->Valid()) {
4594 4595 4596
          ASSERT_TRUE(BytewiseComparator()->Compare(
                          user_key, ExtractUserKey(seek_iter->key())) < 0);
        }
4597
      } else {  // Search using Get()
4598 4599 4600
        PinnableSlice value;
        GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
                               GetContext::kNotFound, user_key, &value, nullptr,
4601
                               nullptr, true, nullptr, nullptr);
4602 4603 4604 4605 4606 4607 4608 4609 4610
        ASSERT_OK(reader->Get(ro, encoded_key, &get_context,
                              moptions.prefix_extractor.get()));
        ASSERT_EQ(get_context.State(), GetContext::kNotFound);
        value.Reset();
      }
    }
  }
}

4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631
// BlockBasedTableIterator should invalidate itself and return
// OutOfBound()=true immediately after Seek(), to allow LevelIterator
// filter out corresponding level.
TEST_P(BlockBasedTableTest, OutOfBoundOnSeek) {
  TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/);
  c.Add("foo", "v1");
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  Options options;
  BlockBasedTableOptions table_opt(GetBlockBasedTableOptions());
  options.table_factory.reset(NewBlockBasedTableFactory(table_opt));
  const ImmutableCFOptions ioptions(options);
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_opt,
           GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap);
  auto* reader = c.GetTableReader();
  ReadOptions read_opt;
  std::string upper_bound = "bar";
  Slice upper_bound_slice(upper_bound);
  read_opt.iterate_upper_bound = &upper_bound_slice;
  std::unique_ptr<InternalIterator> iter;
4632 4633 4634
  iter.reset(new KeyConvertingIterator(reader->NewIterator(
      read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4635 4636 4637
  iter->SeekToFirst();
  ASSERT_FALSE(iter->Valid());
  ASSERT_TRUE(iter->IsOutOfBound());
4638 4639 4640
  iter.reset(new KeyConvertingIterator(reader->NewIterator(
      read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669
  iter->Seek("foo");
  ASSERT_FALSE(iter->Valid());
  ASSERT_TRUE(iter->IsOutOfBound());
}

// BlockBasedTableIterator should invalidate itself and return
// OutOfBound()=true after Next(), if it finds current index key is no smaller
// than upper bound, unless it is pointing to the last data block.
TEST_P(BlockBasedTableTest, OutOfBoundOnNext) {
  TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/);
  c.Add("bar", "v");
  c.Add("foo", "v");
  std::vector<std::string> keys;
  stl_wrappers::KVMap kvmap;
  Options options;
  BlockBasedTableOptions table_opt(GetBlockBasedTableOptions());
  table_opt.flush_block_policy_factory =
      std::make_shared<FlushBlockEveryKeyPolicyFactory>();
  options.table_factory.reset(NewBlockBasedTableFactory(table_opt));
  const ImmutableCFOptions ioptions(options);
  const MutableCFOptions moptions(options);
  c.Finish(options, ioptions, moptions, table_opt,
           GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap);
  auto* reader = c.GetTableReader();
  ReadOptions read_opt;
  std::string ub1 = "bar_after";
  Slice ub_slice1(ub1);
  read_opt.iterate_upper_bound = &ub_slice1;
  std::unique_ptr<InternalIterator> iter;
4670 4671 4672
  iter.reset(new KeyConvertingIterator(reader->NewIterator(
      read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4673 4674 4675 4676 4677 4678 4679 4680 4681
  iter->Seek("bar");
  ASSERT_TRUE(iter->Valid());
  ASSERT_EQ("bar", iter->key());
  iter->Next();
  ASSERT_FALSE(iter->Valid());
  ASSERT_TRUE(iter->IsOutOfBound());
  std::string ub2 = "foo_after";
  Slice ub_slice2(ub2);
  read_opt.iterate_upper_bound = &ub_slice2;
4682 4683 4684
  iter.reset(new KeyConvertingIterator(reader->NewIterator(
      read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
      /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4685 4686 4687 4688 4689 4690 4691 4692
  iter->Seek("foo");
  ASSERT_TRUE(iter->Valid());
  ASSERT_EQ("foo", iter->key());
  iter->Next();
  ASSERT_FALSE(iter->Valid());
  ASSERT_FALSE(iter->IsOutOfBound());
}

4693
}  // namespace ROCKSDB_NAMESPACE
J
jorlow@chromium.org 已提交
4694 4695

int main(int argc, char** argv) {
I
Igor Sugak 已提交
4696 4697
  ::testing::InitGoogleTest(&argc, argv);
  return RUN_ALL_TESTS();
J
jorlow@chromium.org 已提交
4698
}