db_bench.cc 44.4 KB
Newer Older
J
jorlow@chromium.org 已提交
1 2 3 4 5 6 7 8 9
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include "db/db_impl.h"
#include "db/version_set.h"
10
#include "db/db_statistics.h"
11 12 13 14
#include "leveldb/cache.h"
#include "leveldb/db.h"
#include "leveldb/env.h"
#include "leveldb/write_batch.h"
15
#include "leveldb/statistics.h"
J
jorlow@chromium.org 已提交
16 17
#include "port/port.h"
#include "util/crc32c.h"
J
jorlow@chromium.org 已提交
18
#include "util/histogram.h"
19
#include "util/mutexlock.h"
J
jorlow@chromium.org 已提交
20 21
#include "util/random.h"
#include "util/testutil.h"
22
#include "hdfs/env_hdfs.h"
J
jorlow@chromium.org 已提交
23 24 25

// Comma-separated list of operations to run in the specified order
//   Actual benchmarks:
26 27 28 29 30
//      fillseq       -- write N values in sequential key order in async mode
//      fillrandom    -- write N values in random key order in async mode
//      overwrite     -- overwrite N values in random key order in async mode
//      fillsync      -- write N/100 values in random key order in sync mode
//      fill100K      -- write N/1000 100K values in random order in async mode
S
Sanjay Ghemawat 已提交
31 32
//      deleteseq     -- delete N keys in sequential order
//      deleterandom  -- delete N keys in random order
33 34 35
//      readseq       -- read N times sequentially
//      readreverse   -- read N times in reverse order
//      readrandom    -- read N times in random order
S
Sanjay Ghemawat 已提交
36
//      readmissing   -- read N missing keys in random order
37
//      readhot       -- read N times in random order from 1% section of DB
S
Sanjay Ghemawat 已提交
38
//      seekrandom    -- N random seeks
J
jorlow@chromium.org 已提交
39
//      crc32c        -- repeated crc32c of 4K of data
40
//      acquireload   -- load N*1000 times
J
jorlow@chromium.org 已提交
41 42
//   Meta operations:
//      compact     -- Compact the entire DB
43
//      stats       -- Print DB stats
S
Sanjay Ghemawat 已提交
44
//      sstables    -- Print sstable info
J
jorlow@chromium.org 已提交
45 46
//      heapprofile -- Dump a heap profile (if supported by this port)
static const char* FLAGS_benchmarks =
47
    "fillseq,"
J
jorlow@chromium.org 已提交
48
    "fillsync,"
49 50
    "fillrandom,"
    "overwrite,"
J
jorlow@chromium.org 已提交
51 52
    "readrandom,"
    "readrandom,"  // Extra run to allow previous compactions to quiesce
J
jorlow@chromium.org 已提交
53
    "readseq,"
J
jorlow@chromium.org 已提交
54
    "readreverse,"
J
jorlow@chromium.org 已提交
55
    "compact,"
J
jorlow@chromium.org 已提交
56
    "readrandom,"
J
jorlow@chromium.org 已提交
57
    "readseq,"
J
jorlow@chromium.org 已提交
58
    "readreverse,"
59
    "readrandomwriterandom," // mix reads and writes based on FLAGS_readwritepercent
J
jorlow@chromium.org 已提交
60 61
    "fill100K,"
    "crc32c,"
62 63
    "snappycomp,"
    "snappyuncomp,"
64
    "acquireload,"
J
jorlow@chromium.org 已提交
65
    ;
J
jorlow@chromium.org 已提交
66 67

// Number of key/values to place in database
68
static long FLAGS_num = 1000000;
J
jorlow@chromium.org 已提交
69

70
// Number of read operations to do.  If negative, do FLAGS_num reads.
71
static long FLAGS_reads = -1;
72

73 74 75
// Number of concurrent threads to run.
static int FLAGS_threads = 1;

J
jorlow@chromium.org 已提交
76 77 78 79 80
// Size of each value
static int FLAGS_value_size = 100;

// Arrange to generate values that shrink to this fraction of
// their original size after compression
81
static double FLAGS_compression_ratio = 0.5;
J
jorlow@chromium.org 已提交
82 83 84 85 86

// Print histogram of operation timings
static bool FLAGS_histogram = false;

// Number of bytes to buffer in memtable before compacting
87 88 89 90 91
// (initialized to default value by "main")
static int FLAGS_write_buffer_size = 0;

// Number of bytes to use as a cache of uncompressed data.
// Negative means use default settings.
D
Dhruba Borthakur 已提交
92
static long FLAGS_cache_size = -1;
J
jorlow@chromium.org 已提交
93

94 95 96
// Number of bytes in a block.
static int FLAGS_block_size = 0;

97 98 99
// Maximum number of files to keep open at the same time (use default if == 0)
static int FLAGS_open_files = 0;

S
Sanjay Ghemawat 已提交
100 101 102 103
// Bloom filter bits per key.
// Negative means use default settings.
static int FLAGS_bloom_bits = -1;

104 105 106 107 108
// If true, do not destroy the existing database.  If you set this
// flag and also specify a benchmark that wants a fresh database, that
// benchmark will fail.
static bool FLAGS_use_existing_db = false;

109
// Use the db with the following name.
H
heyongqiang 已提交
110
static const char* FLAGS_db = NULL;
111

112 113 114 115 116
// Number of shards for the block cache is 2 ** FLAGS_cache_numshardbits.
// Negative means use default settings. This is applied only
// if FLAGS_cache_size is non-negative.
static int FLAGS_cache_numshardbits = -1;

117 118 119
// Verify checksum for every block read from storage
static bool FLAGS_verify_checksum = false;

120 121 122 123
// Database statistics
static bool FLAGS_statistics = false;
static class leveldb::DBStatistics* dbstats = NULL;

124 125 126
// Number of write operations to do.  If negative, do FLAGS_num reads.
static long FLAGS_writes = -1;

H
heyongqiang 已提交
127 128
// These default values might change if the hardcoded

129 130 131
// Sync all writes to disk
static bool FLAGS_sync = false;

H
heyongqiang 已提交
132 133 134
// If true, do not wait until data is synced to disk.
static bool FLAGS_disable_data_sync = false;

135 136 137
// If true, issue fsync instead of fdatasync
static bool FLAGS_use_fsync = false;

H
heyongqiang 已提交
138 139 140
// If true, do not write WAL for write.
static bool FLAGS_disable_wal = false;

141
// The total number of levels
142
static unsigned int FLAGS_num_levels = 7;
143

H
heyongqiang 已提交
144 145 146 147 148 149
// Target level-0 file size for compaction
static int FLAGS_target_file_size_base = 2 * 1048576;

// A multiplier to compute targe level-N file size
static int FLAGS_target_file_size_multiplier = 1;

150
// Max bytes for level-1
H
heyongqiang 已提交
151 152 153 154 155
static int FLAGS_max_bytes_for_level_base = 10 * 1048576;

// A multiplier to compute max bytes for level-N
static int FLAGS_max_bytes_for_level_multiplier = 10;

H
heyongqiang 已提交
156 157 158
// Number of files in level-0 that will trigger put stop.
static int FLAGS_level0_stop_writes_trigger = 12;

159 160
// Number of files in level-0 that will slow down writes.
static int FLAGS_level0_slowdown_writes_trigger = 8;
H
heyongqiang 已提交
161

M
Mark Callaghan 已提交
162 163 164
// Number of files in level-0 when compactions start
static int FLAGS_level0_file_num_compaction_trigger = 4;

165 166 167 168 169
// Ratio of reads to writes (expressed as a percentage)
// for the ReadRandomWriteRandom workload. The default
// setting is 9 gets for every 1 put.
static int FLAGS_readwritepercent = 90;

170 171 172
// Option to disable compation triggered by read.
static int FLAGS_disable_seek_compaction = false;

173 174 175 176 177
// Option to delete obsolete files periodically
// Default: 0 which means that obsolete files are
// deleted after every compaction run.
static uint64_t FLAGS_delete_obsolete_files_period_micros = 0;

178 179 180 181
// Algorithm to use to compress the database
static enum leveldb::CompressionType FLAGS_compression_type =
    leveldb::kSnappyCompression;

182 183
// Allows compression for levels 0 and 1 to be disabled when
// other levels are compressed
184
static unsigned int FLAGS_min_level_to_compress = -1;
185

186 187
static int FLAGS_table_cache_numshardbits = 4;

188 189 190
// posix or hdfs environment
static leveldb::Env* FLAGS_env = leveldb::Env::Default();

191 192 193 194
// Stats are reported every N operations when this is greater
// than zero. When 0 the interval grows over time.
static int FLAGS_stats_interval = 0;

195 196 197 198 199 200 201 202 203
// Reports additional stats per interval when this is greater
// than 0.
static int FLAGS_stats_per_interval = 0;

// When not equal to 0 this make threads sleep at each stats
// reporting interval until the compaction score for all levels is
// less than or equal to this value.
static double FLAGS_rate_limit = 0;

H
heyongqiang 已提交
204 205 206
// Run read only benchmarks.
static bool FLAGS_read_only = false;

207
extern bool useOsBuffer;
208
extern bool useFsReadAhead;
209
extern bool useMmapRead;
210
extern bool useMmapWrite;
211

J
jorlow@chromium.org 已提交
212 213
namespace leveldb {

214
// Helper for quickly generating random data.
J
jorlow@chromium.org 已提交
215 216 217
class RandomGenerator {
 private:
  std::string data_;
218
  unsigned int pos_;
J
jorlow@chromium.org 已提交
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243

 public:
  RandomGenerator() {
    // We use a limited amount of data over and over again and ensure
    // that it is larger than the compression window (32KB), and also
    // large enough to serve all typical value sizes we want to write.
    Random rnd(301);
    std::string piece;
    while (data_.size() < 1048576) {
      // Add a short fragment that is as compressible as specified
      // by FLAGS_compression_ratio.
      test::CompressibleString(&rnd, FLAGS_compression_ratio, 100, &piece);
      data_.append(piece);
    }
    pos_ = 0;
  }

  Slice Generate(int len) {
    if (pos_ + len > data_.size()) {
      pos_ = 0;
      assert(len < data_.size());
    }
    pos_ += len;
    return Slice(data_.data() + pos_ - len, len);
  }
244
};
245
static Slice TrimSpace(Slice s) {
246
  unsigned int start = 0;
247 248 249
  while (start < s.size() && isspace(s[start])) {
    start++;
  }
250
  unsigned int limit = s.size();
251 252 253 254 255 256
  while (limit > start && isspace(s[limit-1])) {
    limit--;
  }
  return Slice(s.data() + start, limit - start);
}

257 258 259 260 261 262 263 264 265 266
static void AppendWithSpace(std::string* str, Slice msg) {
  if (msg.empty()) return;
  if (!str->empty()) {
    str->push_back(' ');
  }
  str->append(msg.data(), msg.size());
}

class Stats {
 private:
267
  int id_;
268 269 270
  double start_;
  double finish_;
  double seconds_;
271
  long done_;
272
  long last_report_done_;
273 274 275
  int next_report_;
  int64_t bytes_;
  double last_op_finish_;
276
  double last_report_finish_;
277 278 279 280
  Histogram hist_;
  std::string message_;

 public:
281
  Stats() { Start(-1); }
282

283 284 285
  void Start(int id) {
    id_ = id;
    next_report_ = FLAGS_stats_interval ? FLAGS_stats_interval : 100;
286 287 288
    last_op_finish_ = start_;
    hist_.Clear();
    done_ = 0;
289
    last_report_done_ = 0;
290 291
    bytes_ = 0;
    seconds_ = 0;
292
    start_ = FLAGS_env->NowMicros();
293
    finish_ = start_;
294
    last_report_finish_ = start_;
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
    message_.clear();
  }

  void Merge(const Stats& other) {
    hist_.Merge(other.hist_);
    done_ += other.done_;
    bytes_ += other.bytes_;
    seconds_ += other.seconds_;
    if (other.start_ < start_) start_ = other.start_;
    if (other.finish_ > finish_) finish_ = other.finish_;

    // Just keep the messages from one thread
    if (message_.empty()) message_ = other.message_;
  }

  void Stop() {
311
    finish_ = FLAGS_env->NowMicros();
312 313 314 315 316 317 318
    seconds_ = (finish_ - start_) * 1e-6;
  }

  void AddMessage(Slice msg) {
    AppendWithSpace(&message_, msg);
  }

319 320
  void SetId(int id) { id_ = id; }

M
Mark Callaghan 已提交
321
  void FinishedSingleOp(DB* db) {
322
    if (FLAGS_histogram) {
323
      double now = FLAGS_env->NowMicros();
324 325
      double micros = now - last_op_finish_;
      hist_.Add(micros);
326
      if (micros > 20000 && !FLAGS_stats_interval) {
327 328 329 330 331 332 333 334
        fprintf(stderr, "long op: %.1f micros%30s\r", micros, "");
        fflush(stderr);
      }
      last_op_finish_ = now;
    }

    done_++;
    if (done_ >= next_report_) {
335 336 337 338 339 340 341 342 343 344 345 346 347
      if (!FLAGS_stats_interval) {
        if      (next_report_ < 1000)   next_report_ += 100;
        else if (next_report_ < 5000)   next_report_ += 500;
        else if (next_report_ < 10000)  next_report_ += 1000;
        else if (next_report_ < 50000)  next_report_ += 5000;
        else if (next_report_ < 100000) next_report_ += 10000;
        else if (next_report_ < 500000) next_report_ += 50000;
        else                            next_report_ += 100000;
        fprintf(stderr, "... finished %ld ops%30s\r", done_, "");
        fflush(stderr);
      } else {
        double now = FLAGS_env->NowMicros();
        fprintf(stderr,
M
Mark Callaghan 已提交
348
                "... thread %d: (%ld,%ld) ops and (%.1f,%.1f) ops/second in (%.6f,%.6f) seconds\n",
349
                id_,
M
Mark Callaghan 已提交
350
                done_ - last_report_done_, done_,
351
                (done_ - last_report_done_) /
M
Mark Callaghan 已提交
352 353 354 355
                ((now - last_report_finish_) / 1000000.0),
                done_ / ((now - start_) / 1000000.0),
                (now - last_report_finish_) / 1000000.0,
                (now - start_) / 1000000.0);
M
Mark Callaghan 已提交
356

357 358 359 360 361
        if (FLAGS_stats_per_interval) {
          std::string stats;
          if (db && db->GetProperty("leveldb.stats", &stats))
            fprintf(stderr, stats.c_str());
        }
M
Mark Callaghan 已提交
362

363 364 365 366 367
        fflush(stderr);
        next_report_ += FLAGS_stats_interval;
        last_report_finish_ = now;
        last_report_done_ = done_;
      }
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
    }
  }

  void AddBytes(int64_t n) {
    bytes_ += n;
  }

  void Report(const Slice& name) {
    // Pretend at least one op was done in case we are running a benchmark
    // that does not call FinishedSingleOp().
    if (done_ < 1) done_ = 1;

    std::string extra;
    if (bytes_ > 0) {
      // Rate is computed on actual elapsed time, not the sum of per-thread
      // elapsed times.
      double elapsed = (finish_ - start_) * 1e-6;
      char rate[100];
      snprintf(rate, sizeof(rate), "%6.1f MB/s",
               (bytes_ / 1048576.0) / elapsed);
      extra = rate;
    }
    AppendWithSpace(&extra, message_);
391 392
    double elapsed = (finish_ - start_) * 1e-6;
    double throughput = (double)done_/elapsed;
393

D
Dhruba Borthakur 已提交
394
    fprintf(stdout, "%-12s : %11.3f micros/op %ld ops/sec;%s%s\n",
395 396
            name.ToString().c_str(),
            seconds_ * 1e6 / done_,
D
Dhruba Borthakur 已提交
397
            (long)throughput,
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
            (extra.empty() ? "" : " "),
            extra.c_str());
    if (FLAGS_histogram) {
      fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str());
    }
    fflush(stdout);
  }
};

// State shared by all concurrent executions of the same benchmark.
struct SharedState {
  port::Mutex mu;
  port::CondVar cv;
  int total;

  // Each thread goes through the following states:
  //    (1) initializing
  //    (2) waiting for others to be initialized
  //    (3) running
  //    (4) done

419 420
  long num_initialized;
  long num_done;
421 422 423 424 425 426 427 428 429 430
  bool start;

  SharedState() : cv(&mu) { }
};

// Per-thread state for concurrent executions of the same benchmark.
struct ThreadState {
  int tid;             // 0..n-1 when running in n threads
  Random rand;         // Has different seeds for different threads
  Stats stats;
431
  SharedState* shared;
432 433 434 435 436 437 438

  ThreadState(int index)
      : tid(index),
        rand(1000 + index) {
  }
};

J
jorlow@chromium.org 已提交
439 440 441
class Benchmark {
 private:
  Cache* cache_;
S
Sanjay Ghemawat 已提交
442
  const FilterPolicy* filter_policy_;
J
jorlow@chromium.org 已提交
443
  DB* db_;
444
  long num_;
445 446 447
  int value_size_;
  int entries_per_batch_;
  WriteOptions write_options_;
448
  long reads_;
449
  long writes_;
450
  long readwrites_;
J
jorlow@chromium.org 已提交
451 452
  int heap_counter_;

453 454 455 456 457 458 459
  void PrintHeader() {
    const int kKeySize = 16;
    PrintEnvironment();
    fprintf(stdout, "Keys:       %d bytes each\n", kKeySize);
    fprintf(stdout, "Values:     %d bytes each (%d bytes after compression)\n",
            FLAGS_value_size,
            static_cast<int>(FLAGS_value_size * FLAGS_compression_ratio + 0.5));
460
    fprintf(stdout, "Entries:    %ld\n", num_);
461
    fprintf(stdout, "RawSize:    %.1f MB (estimated)\n",
462 463
            ((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_)
             / 1048576.0));
464 465 466
    fprintf(stdout, "FileSize:   %.1f MB (estimated)\n",
            (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_)
             / 1048576.0));
467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482

    switch (FLAGS_compression_type) {
      case leveldb::kNoCompression:
        fprintf(stdout, "Compression: none\n");
        break;
      case leveldb::kSnappyCompression:
        fprintf(stdout, "Compression: snappy\n");
        break;
      case leveldb::kZlibCompression:
        fprintf(stdout, "Compression: zlib\n");
        break;
      case leveldb::kBZip2Compression:
        fprintf(stdout, "Compression: bzip2\n");
        break;
    }

483 484 485 486 487 488 489 490 491 492 493 494 495 496
    PrintWarnings();
    fprintf(stdout, "------------------------------------------------\n");
  }

  void PrintWarnings() {
#if defined(__GNUC__) && !defined(__OPTIMIZE__)
    fprintf(stdout,
            "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"
            );
#endif
#ifndef NDEBUG
    fprintf(stdout,
            "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
#endif
497

498 499 500 501 502 503 504 505 506 507 508 509 510
    if (FLAGS_compression_type != leveldb::kNoCompression) {
      // The test string should not be too small.
      const int len = FLAGS_block_size;
      char* text = (char*) malloc(len+1);
      bool result = true;
      const char* name = NULL;
      std::string compressed;

      memset(text, (int) 'y', len);
      text[len] = '\0';

      switch (FLAGS_compression_type) {
        case kSnappyCompression:
511 512
          result = port::Snappy_Compress(Options().compression_opts, text,
                                         strlen(text), &compressed);
513 514 515
          name = "Snappy";
          break;
        case kZlibCompression:
516 517
          result = port::Zlib_Compress(Options().compression_opts, text,
                                       strlen(text), &compressed);
518 519 520
          name = "Zlib";
          break;
        case kBZip2Compression:
521 522
          result = port::BZip2_Compress(Options().compression_opts, text,
                                        strlen(text), &compressed);
523 524
          name = "BZip2";
          break;
525 526 527
        case kNoCompression:
          assert(false); // cannot happen
          break;
528 529 530 531 532 533 534 535 536
      }

      if (!result) {
        fprintf(stdout, "WARNING: %s compression is not enabled\n", name);
      } else if (name && compressed.size() >= strlen(text)) {
        fprintf(stdout, "WARNING: %s compression is not effective\n", name);
      }

      free(text);
537
    }
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
  }

  void PrintEnvironment() {
    fprintf(stderr, "LevelDB:    version %d.%d\n",
            kMajorVersion, kMinorVersion);

#if defined(__linux)
    time_t now = time(NULL);
    fprintf(stderr, "Date:       %s", ctime(&now));  // ctime() adds newline

    FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
    if (cpuinfo != NULL) {
      char line[1000];
      int num_cpus = 0;
      std::string cpu_type;
      std::string cache_size;
      while (fgets(line, sizeof(line), cpuinfo) != NULL) {
        const char* sep = strchr(line, ':');
        if (sep == NULL) {
          continue;
        }
        Slice key = TrimSpace(Slice(line, sep - 1 - line));
        Slice val = TrimSpace(Slice(sep + 1));
        if (key == "model name") {
          ++num_cpus;
          cpu_type = val.ToString();
        } else if (key == "cache size") {
          cache_size = val.ToString();
        }
      }
      fclose(cpuinfo);
      fprintf(stderr, "CPU:        %d * %s\n", num_cpus, cpu_type.c_str());
      fprintf(stderr, "CPUCache:   %s\n", cache_size.c_str());
    }
#endif
  }

575 576
  void PrintStatistics() {
    if (FLAGS_statistics) {
A
Abhishek Kona 已提交
577 578
      fprintf(stdout, "File opened:%ld closed:%ld errors:%ld\n"
          "Block Cache Hit Count:%ld Block Cache Miss Count:%ld\n",
579 580
              dbstats->getNumFileOpens(),
              dbstats->getNumFileCloses(),
A
Abhishek Kona 已提交
581 582 583
              dbstats->getNumFileErrors(),
          dbstats->getTickerCount(BLOCK_CACHE_HIT),
          dbstats->getTickerCount(BLOCK_CACHE_MISS));
584 585 586
    }
  }

J
jorlow@chromium.org 已提交
587
 public:
588
  Benchmark()
589 590 591
  : cache_(FLAGS_cache_size >= 0 ?
           (FLAGS_cache_numshardbits >= 1 ?
            NewLRUCache(FLAGS_cache_size, FLAGS_cache_numshardbits) :
592
            NewLRUCache(FLAGS_cache_size)) : NULL),
S
Sanjay Ghemawat 已提交
593 594 595
    filter_policy_(FLAGS_bloom_bits >= 0
                   ? NewBloomFilterPolicy(FLAGS_bloom_bits)
                   : NULL),
596 597
    db_(NULL),
    num_(FLAGS_num),
598 599
    value_size_(FLAGS_value_size),
    entries_per_batch_(1),
600
    reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
601
    writes_(FLAGS_writes < 0 ? FLAGS_num : FLAGS_writes),
602
    readwrites_((FLAGS_writes < 0  && FLAGS_reads < 0)? FLAGS_num :
603
                ((FLAGS_writes > FLAGS_reads) ? FLAGS_writes : FLAGS_reads)
604
               ),
605
    heap_counter_(0) {
J
jorlow@chromium.org 已提交
606
    std::vector<std::string> files;
607
    FLAGS_env->GetChildren(FLAGS_db, &files);
608
    for (unsigned int i = 0; i < files.size(); i++) {
J
jorlow@chromium.org 已提交
609
      if (Slice(files[i]).starts_with("heap-")) {
610
        FLAGS_env->DeleteFile(std::string(FLAGS_db) + "/" + files[i]);
J
jorlow@chromium.org 已提交
611 612
      }
    }
613
    if (!FLAGS_use_existing_db) {
614
      DestroyDB(FLAGS_db, Options());
615
    }
J
jorlow@chromium.org 已提交
616 617 618 619 620
  }

  ~Benchmark() {
    delete db_;
    delete cache_;
S
Sanjay Ghemawat 已提交
621
    delete filter_policy_;
J
jorlow@chromium.org 已提交
622 623 624
  }

  void Run() {
625 626
    PrintHeader();
    Open();
J
jorlow@chromium.org 已提交
627 628 629 630 631 632 633 634 635 636 637 638 639

    const char* benchmarks = FLAGS_benchmarks;
    while (benchmarks != NULL) {
      const char* sep = strchr(benchmarks, ',');
      Slice name;
      if (sep == NULL) {
        name = benchmarks;
        benchmarks = NULL;
      } else {
        name = Slice(benchmarks, sep - benchmarks);
        benchmarks = sep + 1;
      }

640 641
      // Reset parameters that may be overriddden bwlow
      num_ = FLAGS_num;
642
      reads_ = (FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads);
643
      writes_ = (FLAGS_writes < 0 ? FLAGS_num : FLAGS_writes);
644 645 646
      value_size_ = FLAGS_value_size;
      entries_per_batch_ = 1;
      write_options_ = WriteOptions();
647 648 649
      if (FLAGS_sync) {
        write_options_.sync = true;
      }
650

H
heyongqiang 已提交
651 652
      write_options_.disableWAL = FLAGS_disable_wal;

653 654
      void (Benchmark::*method)(ThreadState*) = NULL;
      bool fresh_db = false;
655
      int num_threads = FLAGS_threads;
656 657

      if (name == Slice("fillseq")) {
658 659
        fresh_db = true;
        method = &Benchmark::WriteSeq;
660
      } else if (name == Slice("fillbatch")) {
661 662 663
        fresh_db = true;
        entries_per_batch_ = 1000;
        method = &Benchmark::WriteSeq;
664
      } else if (name == Slice("fillrandom")) {
665 666
        fresh_db = true;
        method = &Benchmark::WriteRandom;
667
      } else if (name == Slice("overwrite")) {
668 669
        fresh_db = false;
        method = &Benchmark::WriteRandom;
670
      } else if (name == Slice("fillsync")) {
671 672 673 674
        fresh_db = true;
        num_ /= 1000;
        write_options_.sync = true;
        method = &Benchmark::WriteRandom;
675
      } else if (name == Slice("fill100K")) {
676 677 678 679
        fresh_db = true;
        num_ /= 1000;
        value_size_ = 100 * 1000;
        method = &Benchmark::WriteRandom;
J
jorlow@chromium.org 已提交
680
      } else if (name == Slice("readseq")) {
681
        method = &Benchmark::ReadSequential;
J
jorlow@chromium.org 已提交
682
      } else if (name == Slice("readreverse")) {
683
        method = &Benchmark::ReadReverse;
J
jorlow@chromium.org 已提交
684
      } else if (name == Slice("readrandom")) {
685
        method = &Benchmark::ReadRandom;
S
Sanjay Ghemawat 已提交
686 687 688 689
      } else if (name == Slice("readmissing")) {
        method = &Benchmark::ReadMissing;
      } else if (name == Slice("seekrandom")) {
        method = &Benchmark::SeekRandom;
690
      } else if (name == Slice("readhot")) {
691
        method = &Benchmark::ReadHot;
692
      } else if (name == Slice("readrandomsmall")) {
693
        reads_ /= 1000;
694
        method = &Benchmark::ReadRandom;
S
Sanjay Ghemawat 已提交
695 696 697 698
      } else if (name == Slice("deleteseq")) {
        method = &Benchmark::DeleteSeq;
      } else if (name == Slice("deleterandom")) {
        method = &Benchmark::DeleteRandom;
699 700 701
      } else if (name == Slice("readwhilewriting")) {
        num_threads++;  // Add extra thread for writing
        method = &Benchmark::ReadWhileWriting;
702 703
      } else if (name == Slice("readrandomwriterandom")) {
        method = &Benchmark::ReadRandomWriteRandom;
J
jorlow@chromium.org 已提交
704
      } else if (name == Slice("compact")) {
705
        method = &Benchmark::Compact;
J
jorlow@chromium.org 已提交
706
      } else if (name == Slice("crc32c")) {
707
        method = &Benchmark::Crc32c;
708
      } else if (name == Slice("acquireload")) {
709
        method = &Benchmark::AcquireLoad;
710
      } else if (name == Slice("snappycomp")) {
711
        method = &Benchmark::SnappyCompress;
712
      } else if (name == Slice("snappyuncomp")) {
713
        method = &Benchmark::SnappyUncompress;
J
jorlow@chromium.org 已提交
714 715
      } else if (name == Slice("heapprofile")) {
        HeapProfile();
716
      } else if (name == Slice("stats")) {
S
Sanjay Ghemawat 已提交
717 718 719
        PrintStats("leveldb.stats");
      } else if (name == Slice("sstables")) {
        PrintStats("leveldb.sstables");
J
jorlow@chromium.org 已提交
720
      } else {
721 722 723 724
        if (name != Slice()) {  // No error message for empty name
          fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
        }
      }
725 726 727 728 729 730 731 732 733 734 735 736 737 738 739

      if (fresh_db) {
        if (FLAGS_use_existing_db) {
          fprintf(stdout, "%-12s : skipped (--use_existing_db is true)\n",
                  name.ToString().c_str());
          method = NULL;
        } else {
          delete db_;
          db_ = NULL;
          DestroyDB(FLAGS_db, Options());
          Open();
        }
      }

      if (method != NULL) {
740
        RunBenchmark(num_threads, name, method);
J
jorlow@chromium.org 已提交
741 742
      }
    }
743
    PrintStatistics();
J
jorlow@chromium.org 已提交
744 745
  }

746
 private:
747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768
  struct ThreadArg {
    Benchmark* bm;
    SharedState* shared;
    ThreadState* thread;
    void (Benchmark::*method)(ThreadState*);
  };

  static void ThreadBody(void* v) {
    ThreadArg* arg = reinterpret_cast<ThreadArg*>(v);
    SharedState* shared = arg->shared;
    ThreadState* thread = arg->thread;
    {
      MutexLock l(&shared->mu);
      shared->num_initialized++;
      if (shared->num_initialized >= shared->total) {
        shared->cv.SignalAll();
      }
      while (!shared->start) {
        shared->cv.Wait();
      }
    }

769
    thread->stats.Start(thread->tid);
770 771 772 773 774 775 776 777 778 779 780 781
    (arg->bm->*(arg->method))(thread);
    thread->stats.Stop();

    {
      MutexLock l(&shared->mu);
      shared->num_done++;
      if (shared->num_done >= shared->total) {
        shared->cv.SignalAll();
      }
    }
  }

782 783
  void RunBenchmark(int n, Slice name,
                    void (Benchmark::*method)(ThreadState*)) {
784 785 786 787 788 789 790 791 792 793 794 795
    SharedState shared;
    shared.total = n;
    shared.num_initialized = 0;
    shared.num_done = 0;
    shared.start = false;

    ThreadArg* arg = new ThreadArg[n];
    for (int i = 0; i < n; i++) {
      arg[i].bm = this;
      arg[i].method = method;
      arg[i].shared = &shared;
      arg[i].thread = new ThreadState(i);
796
      arg[i].thread->shared = &shared;
797
      FLAGS_env->StartThread(ThreadBody, &arg[i]);
798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823
    }

    shared.mu.Lock();
    while (shared.num_initialized < n) {
      shared.cv.Wait();
    }

    shared.start = true;
    shared.cv.SignalAll();
    while (shared.num_done < n) {
      shared.cv.Wait();
    }
    shared.mu.Unlock();

    for (int i = 1; i < n; i++) {
      arg[0].thread->stats.Merge(arg[i].thread->stats);
    }
    arg[0].thread->stats.Report(name);

    for (int i = 0; i < n; i++) {
      delete arg[i].thread;
    }
    delete[] arg;
  }

  void Crc32c(ThreadState* thread) {
J
jorlow@chromium.org 已提交
824
    // Checksum about 500MB of data total
825 826
    const int size = 4096;
    const char* label = "(4K per op)";
J
jorlow@chromium.org 已提交
827
    std::string data(size, 'x');
J
jorlow@chromium.org 已提交
828 829 830 831
    int64_t bytes = 0;
    uint32_t crc = 0;
    while (bytes < 500 * 1048576) {
      crc = crc32c::Value(data.data(), size);
M
Mark Callaghan 已提交
832
      thread->stats.FinishedSingleOp(NULL);
J
jorlow@chromium.org 已提交
833 834 835 836 837
      bytes += size;
    }
    // Print so result is not dead
    fprintf(stderr, "... crc=0x%x\r", static_cast<unsigned int>(crc));

838 839
    thread->stats.AddBytes(bytes);
    thread->stats.AddMessage(label);
J
jorlow@chromium.org 已提交
840 841
  }

842
  void AcquireLoad(ThreadState* thread) {
843 844 845 846
    int dummy;
    port::AtomicPointer ap(&dummy);
    int count = 0;
    void *ptr = NULL;
847
    thread->stats.AddMessage("(each op is 1000 loads)");
848 849 850 851 852
    while (count < 100000) {
      for (int i = 0; i < 1000; i++) {
        ptr = ap.Acquire_Load();
      }
      count++;
M
Mark Callaghan 已提交
853
      thread->stats.FinishedSingleOp(NULL);
854 855 856 857
    }
    if (ptr == NULL) exit(1); // Disable unused variable warning.
  }

858 859 860
  void SnappyCompress(ThreadState* thread) {
    RandomGenerator gen;
    Slice input = gen.Generate(Options().block_size);
861 862 863 864 865
    int64_t bytes = 0;
    int64_t produced = 0;
    bool ok = true;
    std::string compressed;
    while (ok && bytes < 1024 * 1048576) {  // Compress 1G
866 867
      ok = port::Snappy_Compress(Options().compression_opts, input.data(),
                                 input.size(), &compressed);
868 869
      produced += compressed.size();
      bytes += input.size();
M
Mark Callaghan 已提交
870
      thread->stats.FinishedSingleOp(NULL);
871 872 873
    }

    if (!ok) {
874
      thread->stats.AddMessage("(snappy failure)");
875 876 877 878
    } else {
      char buf[100];
      snprintf(buf, sizeof(buf), "(output: %.1f%%)",
               (produced * 100.0) / bytes);
879 880
      thread->stats.AddMessage(buf);
      thread->stats.AddBytes(bytes);
881 882 883
    }
  }

884 885 886
  void SnappyUncompress(ThreadState* thread) {
    RandomGenerator gen;
    Slice input = gen.Generate(Options().block_size);
887
    std::string compressed;
888 889
    bool ok = port::Snappy_Compress(Options().compression_opts, input.data(),
                                    input.size(), &compressed);
890
    int64_t bytes = 0;
891
    char* uncompressed = new char[input.size()];
892 893
    while (ok && bytes < 1024 * 1048576) {  // Compress 1G
      ok =  port::Snappy_Uncompress(compressed.data(), compressed.size(),
894 895
                                    uncompressed);
      bytes += input.size();
M
Mark Callaghan 已提交
896
      thread->stats.FinishedSingleOp(NULL);
897
    }
898
    delete[] uncompressed;
899 900

    if (!ok) {
901
      thread->stats.AddMessage("(snappy failure)");
902
    } else {
903
      thread->stats.AddBytes(bytes);
904 905 906
    }
  }

907 908 909
  void Open() {
    assert(db_ == NULL);
    Options options;
910
    options.create_if_missing = !FLAGS_use_existing_db;
911
    options.block_cache = cache_;
912 913 914
    if (cache_ == NULL) {
      options.no_block_cache = true;
    }
915
    options.write_buffer_size = FLAGS_write_buffer_size;
916
    options.block_size = FLAGS_block_size;
S
Sanjay Ghemawat 已提交
917
    options.filter_policy = filter_policy_;
918 919
    options.max_open_files = FLAGS_open_files;
    options.statistics = dbstats;
920
    options.env = FLAGS_env;
H
heyongqiang 已提交
921
    options.disableDataSync = FLAGS_disable_data_sync;
922
    options.use_fsync = FLAGS_use_fsync;
923
    options.num_levels = FLAGS_num_levels;
H
heyongqiang 已提交
924 925 926 927 928
    options.target_file_size_base = FLAGS_target_file_size_base;
    options.target_file_size_multiplier = FLAGS_target_file_size_multiplier;
    options.max_bytes_for_level_base = FLAGS_max_bytes_for_level_base;
    options.max_bytes_for_level_multiplier =
        FLAGS_max_bytes_for_level_multiplier;
H
heyongqiang 已提交
929
    options.level0_stop_writes_trigger = FLAGS_level0_stop_writes_trigger;
M
Mark Callaghan 已提交
930 931
    options.level0_file_num_compaction_trigger =
        FLAGS_level0_file_num_compaction_trigger;
H
heyongqiang 已提交
932 933
    options.level0_slowdown_writes_trigger =
      FLAGS_level0_slowdown_writes_trigger;
934
    options.compression = FLAGS_compression_type;
935 936 937 938 939 940
    if (FLAGS_min_level_to_compress >= 0) {
      assert(FLAGS_min_level_to_compress <= FLAGS_num_levels);
      options.compression_per_level = new CompressionType[FLAGS_num_levels];
      for (unsigned int i = 0; i < FLAGS_min_level_to_compress; i++) {
        options.compression_per_level[i] = kNoCompression;
      }
941
      for (unsigned int i = FLAGS_min_level_to_compress;
942 943 944 945
           i < FLAGS_num_levels; i++) {
        options.compression_per_level[i] = FLAGS_compression_type;
      }
    }
946
    options.disable_seek_compaction = FLAGS_disable_seek_compaction;
947 948
    options.delete_obsolete_files_period_micros =
      FLAGS_delete_obsolete_files_period_micros;
949
    options.rate_limit = FLAGS_rate_limit;
950
    options.table_cache_numshardbits = FLAGS_table_cache_numshardbits;
H
heyongqiang 已提交
951 952 953 954 955 956
    Status s;
    if(FLAGS_read_only) {
      s = DB::OpenForReadOnly(options, FLAGS_db, &db_);
    } else {
      s = DB::Open(options, FLAGS_db, &db_);
    }
957 958 959 960
    if (!s.ok()) {
      fprintf(stderr, "open error: %s\n", s.ToString().c_str());
      exit(1);
    }
961 962 963
    if (FLAGS_min_level_to_compress >= 0) {
      delete options.compression_per_level;
    }
964 965
  }

966 967 968
  void WriteSeq(ThreadState* thread) {
    DoWrite(thread, true);
  }
969

970 971 972 973 974 975
  void WriteRandom(ThreadState* thread) {
    DoWrite(thread, false);
  }

  void DoWrite(ThreadState* thread, bool seq) {
    if (num_ != FLAGS_num) {
976
      char msg[100];
977
      snprintf(msg, sizeof(msg), "(%ld ops)", num_);
978
      thread->stats.AddMessage(msg);
979 980
    }

981
    RandomGenerator gen;
J
jorlow@chromium.org 已提交
982 983
    WriteBatch batch;
    Status s;
984
    int64_t bytes = 0;
985
    for (int i = 0; i < writes_; i += entries_per_batch_) {
J
jorlow@chromium.org 已提交
986
      batch.Clear();
987 988
      for (int j = 0; j < entries_per_batch_; j++) {
        const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num);
989 990
        char key[100];
        snprintf(key, sizeof(key), "%016d", k);
991 992
        batch.Put(key, gen.Generate(value_size_));
        bytes += value_size_ + strlen(key);
M
Mark Callaghan 已提交
993
        thread->stats.FinishedSingleOp(db_);
994
      }
995
      s = db_->Write(write_options_, &batch);
J
jorlow@chromium.org 已提交
996 997 998 999 1000
      if (!s.ok()) {
        fprintf(stderr, "put error: %s\n", s.ToString().c_str());
        exit(1);
      }
    }
1001
    thread->stats.AddBytes(bytes);
J
jorlow@chromium.org 已提交
1002 1003
  }

1004
  void ReadSequential(ThreadState* thread) {
1005
    Iterator* iter = db_->NewIterator(ReadOptions(FLAGS_verify_checksum, true));
1006
    long i = 0;
1007
    int64_t bytes = 0;
1008
    for (iter->SeekToFirst(); i < reads_ && iter->Valid(); iter->Next()) {
1009
      bytes += iter->key().size() + iter->value().size();
M
Mark Callaghan 已提交
1010
      thread->stats.FinishedSingleOp(db_);
1011 1012 1013
      ++i;
    }
    delete iter;
1014
    thread->stats.AddBytes(bytes);
1015 1016
  }

1017
  void ReadReverse(ThreadState* thread) {
1018
    Iterator* iter = db_->NewIterator(ReadOptions(FLAGS_verify_checksum, true));
1019
    long i = 0;
1020
    int64_t bytes = 0;
1021
    for (iter->SeekToLast(); i < reads_ && iter->Valid(); iter->Prev()) {
1022
      bytes += iter->key().size() + iter->value().size();
M
Mark Callaghan 已提交
1023
      thread->stats.FinishedSingleOp(db_);
1024 1025 1026
      ++i;
    }
    delete iter;
1027
    thread->stats.AddBytes(bytes);
1028 1029
  }

1030
  void ReadRandom(ThreadState* thread) {
1031
    ReadOptions options(FLAGS_verify_checksum, true);
1032
    std::string value;
1033
    long found = 0;
1034
    for (long i = 0; i < reads_; i++) {
1035
      char key[100];
1036
      const int k = thread->rand.Next() % FLAGS_num;
1037
      snprintf(key, sizeof(key), "%016d", k);
S
Sanjay Ghemawat 已提交
1038 1039 1040
      if (db_->Get(options, key, &value).ok()) {
        found++;
      }
M
Mark Callaghan 已提交
1041
      thread->stats.FinishedSingleOp(db_);
S
Sanjay Ghemawat 已提交
1042 1043
    }
    char msg[100];
1044
    snprintf(msg, sizeof(msg), "(%ld of %ld found)", found, num_);
S
Sanjay Ghemawat 已提交
1045 1046 1047 1048
    thread->stats.AddMessage(msg);
  }

  void ReadMissing(ThreadState* thread) {
1049
    ReadOptions options(FLAGS_verify_checksum, true);
S
Sanjay Ghemawat 已提交
1050
    std::string value;
1051
    for (long i = 0; i < reads_; i++) {
S
Sanjay Ghemawat 已提交
1052 1053 1054
      char key[100];
      const int k = thread->rand.Next() % FLAGS_num;
      snprintf(key, sizeof(key), "%016d.", k);
1055
      db_->Get(options, key, &value);
M
Mark Callaghan 已提交
1056
      thread->stats.FinishedSingleOp(db_);
J
jorlow@chromium.org 已提交
1057 1058 1059
    }
  }

1060
  void ReadHot(ThreadState* thread) {
1061
    ReadOptions options(FLAGS_verify_checksum, true);
1062
    std::string value;
1063 1064
    const long range = (FLAGS_num + 99) / 100;
    for (long i = 0; i < reads_; i++) {
1065
      char key[100];
1066
      const int k = thread->rand.Next() % range;
1067 1068
      snprintf(key, sizeof(key), "%016d", k);
      db_->Get(options, key, &value);
M
Mark Callaghan 已提交
1069
      thread->stats.FinishedSingleOp(db_);
1070 1071 1072
    }
  }

S
Sanjay Ghemawat 已提交
1073
  void SeekRandom(ThreadState* thread) {
1074
    ReadOptions options(FLAGS_verify_checksum, true);
S
Sanjay Ghemawat 已提交
1075
    std::string value;
1076
    long found = 0;
1077
    for (long i = 0; i < reads_; i++) {
S
Sanjay Ghemawat 已提交
1078 1079 1080 1081 1082 1083 1084
      Iterator* iter = db_->NewIterator(options);
      char key[100];
      const int k = thread->rand.Next() % FLAGS_num;
      snprintf(key, sizeof(key), "%016d", k);
      iter->Seek(key);
      if (iter->Valid() && iter->key() == key) found++;
      delete iter;
M
Mark Callaghan 已提交
1085
      thread->stats.FinishedSingleOp(db_);
S
Sanjay Ghemawat 已提交
1086 1087
    }
    char msg[100];
1088
    snprintf(msg, sizeof(msg), "(%ld of %ld found)", found, num_);
S
Sanjay Ghemawat 已提交
1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102
    thread->stats.AddMessage(msg);
  }

  void DoDelete(ThreadState* thread, bool seq) {
    RandomGenerator gen;
    WriteBatch batch;
    Status s;
    for (int i = 0; i < num_; i += entries_per_batch_) {
      batch.Clear();
      for (int j = 0; j < entries_per_batch_; j++) {
        const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num);
        char key[100];
        snprintf(key, sizeof(key), "%016d", k);
        batch.Delete(key);
M
Mark Callaghan 已提交
1103
        thread->stats.FinishedSingleOp(db_);
S
Sanjay Ghemawat 已提交
1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
      }
      s = db_->Write(write_options_, &batch);
      if (!s.ok()) {
        fprintf(stderr, "del error: %s\n", s.ToString().c_str());
        exit(1);
      }
    }
  }

  void DeleteSeq(ThreadState* thread) {
    DoDelete(thread, true);
  }

  void DeleteRandom(ThreadState* thread) {
    DoDelete(thread, false);
  }

1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146
  void ReadWhileWriting(ThreadState* thread) {
    if (thread->tid > 0) {
      ReadRandom(thread);
    } else {
      // Special thread that keeps writing until other threads are done.
      RandomGenerator gen;
      while (true) {
        {
          MutexLock l(&thread->shared->mu);
          if (thread->shared->num_done + 1 >= thread->shared->num_initialized) {
            // Other threads have finished
            break;
          }
        }

        const int k = thread->rand.Next() % FLAGS_num;
        char key[100];
        snprintf(key, sizeof(key), "%016d", k);
        Status s = db_->Put(write_options_, key, gen.Generate(value_size_));
        if (!s.ok()) {
          fprintf(stderr, "put error: %s\n", s.ToString().c_str());
          exit(1);
        }
      }

      // Do not count any of the preceding work/delay in stats.
1147
      thread->stats.Start(thread->tid);
1148 1149 1150
    }
  }

1151 1152
  //
  // This is diffferent from ReadWhileWriting because it does not use
1153
  // an extra thread.
1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164
  //
  void ReadRandomWriteRandom(ThreadState* thread) {
    ReadOptions options(FLAGS_verify_checksum, true);
    RandomGenerator gen;
    std::string value;
    long found = 0;
    int get_weight = 0;
    int put_weight = 0;
    long reads_done = 0;
    long writes_done = 0;
    // the number of iterations is the larger of read_ or write_
1165
    for (long i = 0; i < readwrites_; i++) {
1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191
      char key[100];
      const int k = thread->rand.Next() % FLAGS_num;
      snprintf(key, sizeof(key), "%016d", k);
      if (get_weight == 0 && put_weight == 0) {
        // one batch complated, reinitialize for next batch
        get_weight = FLAGS_readwritepercent;
        put_weight = 100 - get_weight;
      }
      if (get_weight > 0) {
        // do all the gets first
        if (db_->Get(options, key, &value).ok()) {
          found++;
        }
        get_weight--;
        reads_done++;
      } else  if (put_weight > 0) {
        // then do all the corresponding number of puts
        // for all the gets we have done earlier
        Status s = db_->Put(write_options_, key, gen.Generate(value_size_));
        if (!s.ok()) {
          fprintf(stderr, "put error: %s\n", s.ToString().c_str());
          exit(1);
        }
        put_weight--;
        writes_done++;
      }
M
Mark Callaghan 已提交
1192
      thread->stats.FinishedSingleOp(db_);
1193 1194
    }
    char msg[100];
1195
    snprintf(msg, sizeof(msg), "( reads:%ld writes:%ld total:%ld )",
1196
             reads_done, writes_done, readwrites_);
1197 1198 1199
    thread->stats.AddMessage(msg);
  }

1200
  void Compact(ThreadState* thread) {
G
Gabor Cselle 已提交
1201
    db_->CompactRange(NULL, NULL);
J
jorlow@chromium.org 已提交
1202 1203
  }

S
Sanjay Ghemawat 已提交
1204
  void PrintStats(const char* key) {
1205
    std::string stats;
S
Sanjay Ghemawat 已提交
1206
    if (!db_->GetProperty(key, &stats)) {
1207
      stats = "(failed)";
1208
    }
1209
    fprintf(stdout, "\n%s\n", stats.c_str());
1210 1211
  }

J
jorlow@chromium.org 已提交
1212 1213 1214 1215 1216 1217
  static void WriteToFile(void* arg, const char* buf, int n) {
    reinterpret_cast<WritableFile*>(arg)->Append(Slice(buf, n));
  }

  void HeapProfile() {
    char fname[100];
1218
    snprintf(fname, sizeof(fname), "%s/heap-%04d", FLAGS_db, ++heap_counter_);
J
jorlow@chromium.org 已提交
1219
    WritableFile* file;
1220
    Status s = FLAGS_env->NewWritableFile(fname, &file);
J
jorlow@chromium.org 已提交
1221
    if (!s.ok()) {
1222
      fprintf(stderr, "%s\n", s.ToString().c_str());
J
jorlow@chromium.org 已提交
1223 1224 1225 1226 1227
      return;
    }
    bool ok = port::GetHeapProfile(WriteToFile, file);
    delete file;
    if (!ok) {
1228
      fprintf(stderr, "heap profiling not supported\n");
1229
      FLAGS_env->DeleteFile(fname);
J
jorlow@chromium.org 已提交
1230 1231 1232 1233
    }
  }
};

H
Hans Wennborg 已提交
1234
}  // namespace leveldb
J
jorlow@chromium.org 已提交
1235 1236

int main(int argc, char** argv) {
1237
  FLAGS_write_buffer_size = leveldb::Options().write_buffer_size;
1238
  FLAGS_open_files = leveldb::Options().max_open_files;
1239 1240
  // Compression test code above refers to FLAGS_block_size
  FLAGS_block_size = leveldb::Options().block_size;
H
heyongqiang 已提交
1241
  std::string default_db_path;
1242

J
jorlow@chromium.org 已提交
1243 1244 1245
  for (int i = 1; i < argc; i++) {
    double d;
    int n;
1246
    long l;
J
jorlow@chromium.org 已提交
1247
    char junk;
1248
    char hdfsname[2048];
J
jorlow@chromium.org 已提交
1249 1250 1251 1252 1253 1254 1255
    if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) {
      FLAGS_benchmarks = argv[i] + strlen("--benchmarks=");
    } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) {
      FLAGS_compression_ratio = d;
    } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      FLAGS_histogram = n;
1256 1257 1258
    } else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      FLAGS_use_existing_db = n;
1259 1260
    } else if (sscanf(argv[i], "--num=%ld%c", &l, &junk) == 1) {
      FLAGS_num = l;
1261 1262
    } else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) {
      FLAGS_reads = n;
1263 1264
    } else if (sscanf(argv[i], "--threads=%d%c", &n, &junk) == 1) {
      FLAGS_threads = n;
J
jorlow@chromium.org 已提交
1265 1266 1267 1268
    } else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) {
      FLAGS_value_size = n;
    } else if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) {
      FLAGS_write_buffer_size = n;
1269 1270
    } else if (sscanf(argv[i], "--cache_size=%ld%c", &l, &junk) == 1) {
      FLAGS_cache_size = l;
1271 1272
    } else if (sscanf(argv[i], "--block_size=%d%c", &n, &junk) == 1) {
      FLAGS_block_size = n;
1273
    } else if (sscanf(argv[i], "--cache_numshardbits=%d%c", &n, &junk) == 1) {
1274 1275 1276 1277 1278 1279
      if (n < 20) {
        FLAGS_cache_numshardbits = n;
      } else {
        fprintf(stderr, "The cache cannot be sharded into 2**%d pieces\n", n);
        exit(1);
      }
1280 1281 1282 1283 1284 1285 1286
    } else if (sscanf(argv[i], "--table_cache_numshardbits=%d%c",
		      &n, &junk) == 1) {
      if (n <= 0 || n > 20) {
        fprintf(stderr, "The cache cannot be sharded into 2**%d pieces\n", n);
        exit(1);
      }
      FLAGS_table_cache_numshardbits = n;
S
Sanjay Ghemawat 已提交
1287 1288
    } else if (sscanf(argv[i], "--bloom_bits=%d%c", &n, &junk) == 1) {
      FLAGS_bloom_bits = n;
1289 1290
    } else if (sscanf(argv[i], "--open_files=%d%c", &n, &junk) == 1) {
      FLAGS_open_files = n;
1291 1292
    } else if (strncmp(argv[i], "--db=", 5) == 0) {
      FLAGS_db = argv[i] + 5;
1293 1294 1295
    } else if (sscanf(argv[i], "--verify_checksum=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      FLAGS_verify_checksum = n;
1296 1297 1298
    } else if (sscanf(argv[i], "--bufferedio=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      useOsBuffer = n;
1299 1300 1301
    } else if (sscanf(argv[i], "--mmap_read=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      useMmapRead = n;
1302 1303 1304
    } else if (sscanf(argv[i], "--mmap_write=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      useMmapWrite = n;
1305
    } else if (sscanf(argv[i], "--readahead=%d%c", &n, &junk) == 1 &&
1306 1307
               (n == 0 || n == 1)) {
      useFsReadAhead = n;
1308 1309 1310 1311 1312 1313
    } else if (sscanf(argv[i], "--statistics=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      if (n == 1) {
        dbstats = new leveldb::DBStatistics();
        FLAGS_statistics = true;
      }
1314 1315 1316 1317 1318
    } else if (sscanf(argv[i], "--writes=%d%c", &n, &junk) == 1) {
      FLAGS_writes = n;
    } else if (sscanf(argv[i], "--sync=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      FLAGS_sync = n;
1319
    } else if (sscanf(argv[i], "--readwritepercent=%d%c", &n, &junk) == 1 &&
1320
               n > 0 && n < 100) {
1321
      FLAGS_readwritepercent = n;
H
heyongqiang 已提交
1322 1323 1324
    } else if (sscanf(argv[i], "--disable_data_sync=%d%c", &n, &junk) == 1 &&
        (n == 0 || n == 1)) {
      FLAGS_disable_data_sync = n;
1325 1326 1327
    } else if (sscanf(argv[i], "--use_fsync=%d%c", &n, &junk) == 1 &&
        (n == 0 || n == 1)) {
      FLAGS_use_fsync = n;
H
heyongqiang 已提交
1328
    } else if (sscanf(argv[i], "--disable_wal=%d%c", &n, &junk) == 1 &&
1329
        (n == 0 || n == 1)) {
H
heyongqiang 已提交
1330
      FLAGS_disable_wal = n;
1331
    } else if (sscanf(argv[i], "--hdfs=%s", hdfsname) == 1) {
1332
      FLAGS_env  = new leveldb::HdfsEnv(hdfsname);
1333 1334 1335
    } else if (sscanf(argv[i], "--num_levels=%d%c",
        &n, &junk) == 1) {
      FLAGS_num_levels = n;
H
heyongqiang 已提交
1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347
    } else if (sscanf(argv[i], "--target_file_size_base=%d%c",
        &n, &junk) == 1) {
      FLAGS_target_file_size_base = n;
    } else if ( sscanf(argv[i], "--target_file_size_multiplier=%d%c",
        &n, &junk) == 1) {
      FLAGS_target_file_size_multiplier = n;
    } else if (
        sscanf(argv[i], "--max_bytes_for_level_base=%d%c", &n, &junk) == 1) {
      FLAGS_max_bytes_for_level_base = n;
    } else if (sscanf(argv[i], "--max_bytes_for_level_multiplier=%d%c",
        &n, &junk) == 1) {
      FLAGS_max_bytes_for_level_multiplier = n;
H
heyongqiang 已提交
1348 1349 1350 1351 1352 1353
    } else if (sscanf(argv[i],"--level0_stop_writes_trigger=%d%c",
        &n, &junk) == 1) {
      FLAGS_level0_stop_writes_trigger = n;
    } else if (sscanf(argv[i],"--level0_slowdown_writes_trigger=%d%c",
        &n, &junk) == 1) {
      FLAGS_level0_slowdown_writes_trigger = n;
M
Mark Callaghan 已提交
1354 1355 1356
    } else if (sscanf(argv[i],"--level0_file_num_compaction_trigger=%d%c",
        &n, &junk) == 1) {
      FLAGS_level0_file_num_compaction_trigger = n;
1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369
    } else if (strncmp(argv[i], "--compression_type=", 19) == 0) {
      const char* ctype = argv[i] + 19;
      if (!strcasecmp(ctype, "none"))
        FLAGS_compression_type = leveldb::kNoCompression;
      else if (!strcasecmp(ctype, "snappy"))
        FLAGS_compression_type = leveldb::kSnappyCompression;
      else if (!strcasecmp(ctype, "zlib"))
        FLAGS_compression_type = leveldb::kZlibCompression;
      else if (!strcasecmp(ctype, "bzip2"))
        FLAGS_compression_type = leveldb::kBZip2Compression;
      else {
        fprintf(stdout, "Cannot parse %s\n", argv[i]);
      }
1370 1371
    } else if (sscanf(argv[i], "--min_level_to_compress=%d%c", &n, &junk) == 1
        && n >= 0) {
1372
      FLAGS_min_level_to_compress = n;
1373 1374 1375
    } else if (sscanf(argv[i], "--disable_seek_compaction=%d%c", &n, &junk) == 1
        && (n == 0 || n == 1)) {
      FLAGS_disable_seek_compaction = n;
1376 1377
    } else if (sscanf(argv[i], "--delete_obsolete_files_period_micros=%ld%c",
                      &l, &junk) == 1) {
1378
      FLAGS_delete_obsolete_files_period_micros = l;
1379 1380 1381
    } else if (sscanf(argv[i], "--stats_interval=%d%c", &n, &junk) == 1 &&
               n >= 0 && n < 2000000000) {
      FLAGS_stats_interval = n;
1382 1383 1384 1385 1386 1387
    } else if (sscanf(argv[i], "--stats_per_interval=%d%c", &n, &junk) == 1
        && (n == 0 || n == 1)) {
      FLAGS_stats_per_interval = n;
    } else if (sscanf(argv[i], "--rate_limit=%lf%c", &d, &junk) == 1 &&
               d > 1.0) {
      FLAGS_rate_limit = d;
H
heyongqiang 已提交
1388 1389 1390
    } else if (sscanf(argv[i], "--readonly=%d%c", &n, &junk) == 1 &&
        (n == 0 || n ==1 )) {
      FLAGS_read_only = n;
1391
    } else {
J
jorlow@chromium.org 已提交
1392 1393 1394 1395 1396
      fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
      exit(1);
    }
  }

H
heyongqiang 已提交
1397 1398 1399 1400 1401 1402 1403
  // Choose a location for the test database if none given with --db=<path>
  if (FLAGS_db == NULL) {
      leveldb::Env::Default()->GetTestDirectory(&default_db_path);
      default_db_path += "/dbbench";
      FLAGS_db = default_db_path.c_str();
  }

J
jorlow@chromium.org 已提交
1404 1405 1406 1407
  leveldb::Benchmark benchmark;
  benchmark.Run();
  return 0;
}