db_bench.cc 44.8 KB
Newer Older
J
jorlow@chromium.org 已提交
1 2 3 4 5 6 7 8 9
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include "db/db_impl.h"
#include "db/version_set.h"
10
#include "db/db_statistics.h"
11 12 13 14
#include "leveldb/cache.h"
#include "leveldb/db.h"
#include "leveldb/env.h"
#include "leveldb/write_batch.h"
15
#include "leveldb/statistics.h"
J
jorlow@chromium.org 已提交
16 17
#include "port/port.h"
#include "util/crc32c.h"
J
jorlow@chromium.org 已提交
18
#include "util/histogram.h"
19
#include "util/mutexlock.h"
J
jorlow@chromium.org 已提交
20 21
#include "util/random.h"
#include "util/testutil.h"
22
#include "hdfs/env_hdfs.h"
J
jorlow@chromium.org 已提交
23 24 25

// Comma-separated list of operations to run in the specified order
//   Actual benchmarks:
26 27 28 29 30
//      fillseq       -- write N values in sequential key order in async mode
//      fillrandom    -- write N values in random key order in async mode
//      overwrite     -- overwrite N values in random key order in async mode
//      fillsync      -- write N/100 values in random key order in sync mode
//      fill100K      -- write N/1000 100K values in random order in async mode
S
Sanjay Ghemawat 已提交
31 32
//      deleteseq     -- delete N keys in sequential order
//      deleterandom  -- delete N keys in random order
33 34 35
//      readseq       -- read N times sequentially
//      readreverse   -- read N times in reverse order
//      readrandom    -- read N times in random order
S
Sanjay Ghemawat 已提交
36
//      readmissing   -- read N missing keys in random order
37
//      readhot       -- read N times in random order from 1% section of DB
S
Sanjay Ghemawat 已提交
38
//      seekrandom    -- N random seeks
J
jorlow@chromium.org 已提交
39
//      crc32c        -- repeated crc32c of 4K of data
40
//      acquireload   -- load N*1000 times
J
jorlow@chromium.org 已提交
41 42
//   Meta operations:
//      compact     -- Compact the entire DB
43
//      stats       -- Print DB stats
S
Sanjay Ghemawat 已提交
44
//      sstables    -- Print sstable info
J
jorlow@chromium.org 已提交
45 46
//      heapprofile -- Dump a heap profile (if supported by this port)
static const char* FLAGS_benchmarks =
47
    "fillseq,"
J
jorlow@chromium.org 已提交
48
    "fillsync,"
49 50
    "fillrandom,"
    "overwrite,"
J
jorlow@chromium.org 已提交
51 52
    "readrandom,"
    "readrandom,"  // Extra run to allow previous compactions to quiesce
J
jorlow@chromium.org 已提交
53
    "readseq,"
J
jorlow@chromium.org 已提交
54
    "readreverse,"
J
jorlow@chromium.org 已提交
55
    "compact,"
J
jorlow@chromium.org 已提交
56
    "readrandom,"
J
jorlow@chromium.org 已提交
57
    "readseq,"
J
jorlow@chromium.org 已提交
58
    "readreverse,"
59
    "readrandomwriterandom," // mix reads and writes based on FLAGS_readwritepercent
J
jorlow@chromium.org 已提交
60 61
    "fill100K,"
    "crc32c,"
62 63
    "snappycomp,"
    "snappyuncomp,"
64
    "acquireload,"
J
jorlow@chromium.org 已提交
65
    ;
J
jorlow@chromium.org 已提交
66 67

// Number of key/values to place in database
68
static long FLAGS_num = 1000000;
J
jorlow@chromium.org 已提交
69

70
// Number of read operations to do.  If negative, do FLAGS_num reads.
71
static long FLAGS_reads = -1;
72

73 74 75
// Number of concurrent threads to run.
static int FLAGS_threads = 1;

J
jorlow@chromium.org 已提交
76 77 78 79 80
// Size of each value
static int FLAGS_value_size = 100;

// Arrange to generate values that shrink to this fraction of
// their original size after compression
81
static double FLAGS_compression_ratio = 0.5;
J
jorlow@chromium.org 已提交
82 83 84 85 86

// Print histogram of operation timings
static bool FLAGS_histogram = false;

// Number of bytes to buffer in memtable before compacting
87 88 89 90 91
// (initialized to default value by "main")
static int FLAGS_write_buffer_size = 0;

// Number of bytes to use as a cache of uncompressed data.
// Negative means use default settings.
D
Dhruba Borthakur 已提交
92
static long FLAGS_cache_size = -1;
J
jorlow@chromium.org 已提交
93

94 95 96
// Number of bytes in a block.
static int FLAGS_block_size = 0;

97 98 99
// Maximum number of files to keep open at the same time (use default if == 0)
static int FLAGS_open_files = 0;

S
Sanjay Ghemawat 已提交
100 101 102 103
// Bloom filter bits per key.
// Negative means use default settings.
static int FLAGS_bloom_bits = -1;

104 105 106 107 108
// If true, do not destroy the existing database.  If you set this
// flag and also specify a benchmark that wants a fresh database, that
// benchmark will fail.
static bool FLAGS_use_existing_db = false;

109
// Use the db with the following name.
H
heyongqiang 已提交
110
static const char* FLAGS_db = NULL;
111

112 113 114 115 116
// Number of shards for the block cache is 2 ** FLAGS_cache_numshardbits.
// Negative means use default settings. This is applied only
// if FLAGS_cache_size is non-negative.
static int FLAGS_cache_numshardbits = -1;

117 118 119
// Verify checksum for every block read from storage
static bool FLAGS_verify_checksum = false;

120 121 122 123
// Database statistics
static bool FLAGS_statistics = false;
static class leveldb::DBStatistics* dbstats = NULL;

124 125 126
// Number of write operations to do.  If negative, do FLAGS_num reads.
static long FLAGS_writes = -1;

H
heyongqiang 已提交
127 128
// These default values might change if the hardcoded

129 130 131
// Sync all writes to disk
static bool FLAGS_sync = false;

H
heyongqiang 已提交
132 133 134
// If true, do not wait until data is synced to disk.
static bool FLAGS_disable_data_sync = false;

135 136 137
// If true, issue fsync instead of fdatasync
static bool FLAGS_use_fsync = false;

H
heyongqiang 已提交
138 139 140
// If true, do not write WAL for write.
static bool FLAGS_disable_wal = false;

141
// The total number of levels
142
static unsigned int FLAGS_num_levels = 7;
143

H
heyongqiang 已提交
144 145 146 147 148 149
// Target level-0 file size for compaction
static int FLAGS_target_file_size_base = 2 * 1048576;

// A multiplier to compute targe level-N file size
static int FLAGS_target_file_size_multiplier = 1;

150
// Max bytes for level-1
151
static uint64_t FLAGS_max_bytes_for_level_base = 10 * 1048576;
H
heyongqiang 已提交
152 153 154 155

// A multiplier to compute max bytes for level-N
static int FLAGS_max_bytes_for_level_multiplier = 10;

H
heyongqiang 已提交
156 157 158
// Number of files in level-0 that will trigger put stop.
static int FLAGS_level0_stop_writes_trigger = 12;

159 160
// Number of files in level-0 that will slow down writes.
static int FLAGS_level0_slowdown_writes_trigger = 8;
H
heyongqiang 已提交
161

M
Mark Callaghan 已提交
162 163 164
// Number of files in level-0 when compactions start
static int FLAGS_level0_file_num_compaction_trigger = 4;

165 166 167 168 169
// Ratio of reads to writes (expressed as a percentage)
// for the ReadRandomWriteRandom workload. The default
// setting is 9 gets for every 1 put.
static int FLAGS_readwritepercent = 90;

170 171 172
// Option to disable compation triggered by read.
static int FLAGS_disable_seek_compaction = false;

173 174 175 176 177
// Option to delete obsolete files periodically
// Default: 0 which means that obsolete files are
// deleted after every compaction run.
static uint64_t FLAGS_delete_obsolete_files_period_micros = 0;

178 179 180 181
// Algorithm to use to compress the database
static enum leveldb::CompressionType FLAGS_compression_type =
    leveldb::kSnappyCompression;

182 183
// Allows compression for levels 0 and 1 to be disabled when
// other levels are compressed
184
static int FLAGS_min_level_to_compress = -1;
185

186 187
static int FLAGS_table_cache_numshardbits = 4;

188 189 190
// posix or hdfs environment
static leveldb::Env* FLAGS_env = leveldb::Env::Default();

191 192 193 194
// Stats are reported every N operations when this is greater
// than zero. When 0 the interval grows over time.
static int FLAGS_stats_interval = 0;

195 196 197 198 199 200 201 202 203
// Reports additional stats per interval when this is greater
// than 0.
static int FLAGS_stats_per_interval = 0;

// When not equal to 0 this make threads sleep at each stats
// reporting interval until the compaction score for all levels is
// less than or equal to this value.
static double FLAGS_rate_limit = 0;

H
heyongqiang 已提交
204 205 206
// Run read only benchmarks.
static bool FLAGS_read_only = false;

207
extern bool useOsBuffer;
208
extern bool useFsReadAhead;
209
extern bool useMmapRead;
210
extern bool useMmapWrite;
211

J
jorlow@chromium.org 已提交
212 213
namespace leveldb {

214
// Helper for quickly generating random data.
J
jorlow@chromium.org 已提交
215 216 217
class RandomGenerator {
 private:
  std::string data_;
218
  unsigned int pos_;
J
jorlow@chromium.org 已提交
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243

 public:
  RandomGenerator() {
    // We use a limited amount of data over and over again and ensure
    // that it is larger than the compression window (32KB), and also
    // large enough to serve all typical value sizes we want to write.
    Random rnd(301);
    std::string piece;
    while (data_.size() < 1048576) {
      // Add a short fragment that is as compressible as specified
      // by FLAGS_compression_ratio.
      test::CompressibleString(&rnd, FLAGS_compression_ratio, 100, &piece);
      data_.append(piece);
    }
    pos_ = 0;
  }

  Slice Generate(int len) {
    if (pos_ + len > data_.size()) {
      pos_ = 0;
      assert(len < data_.size());
    }
    pos_ += len;
    return Slice(data_.data() + pos_ - len, len);
  }
244
};
245
static Slice TrimSpace(Slice s) {
246
  unsigned int start = 0;
247 248 249
  while (start < s.size() && isspace(s[start])) {
    start++;
  }
250
  unsigned int limit = s.size();
251 252 253 254 255 256
  while (limit > start && isspace(s[limit-1])) {
    limit--;
  }
  return Slice(s.data() + start, limit - start);
}

257 258 259 260 261 262 263 264 265 266
static void AppendWithSpace(std::string* str, Slice msg) {
  if (msg.empty()) return;
  if (!str->empty()) {
    str->push_back(' ');
  }
  str->append(msg.data(), msg.size());
}

class Stats {
 private:
267
  int id_;
268 269 270
  double start_;
  double finish_;
  double seconds_;
271
  long done_;
272
  long last_report_done_;
273 274 275
  int next_report_;
  int64_t bytes_;
  double last_op_finish_;
276
  double last_report_finish_;
277 278 279 280
  Histogram hist_;
  std::string message_;

 public:
281
  Stats() { Start(-1); }
282

283 284 285
  void Start(int id) {
    id_ = id;
    next_report_ = FLAGS_stats_interval ? FLAGS_stats_interval : 100;
286 287 288
    last_op_finish_ = start_;
    hist_.Clear();
    done_ = 0;
289
    last_report_done_ = 0;
290 291
    bytes_ = 0;
    seconds_ = 0;
292
    start_ = FLAGS_env->NowMicros();
293
    finish_ = start_;
294
    last_report_finish_ = start_;
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
    message_.clear();
  }

  void Merge(const Stats& other) {
    hist_.Merge(other.hist_);
    done_ += other.done_;
    bytes_ += other.bytes_;
    seconds_ += other.seconds_;
    if (other.start_ < start_) start_ = other.start_;
    if (other.finish_ > finish_) finish_ = other.finish_;

    // Just keep the messages from one thread
    if (message_.empty()) message_ = other.message_;
  }

  void Stop() {
311
    finish_ = FLAGS_env->NowMicros();
312 313 314 315 316 317 318
    seconds_ = (finish_ - start_) * 1e-6;
  }

  void AddMessage(Slice msg) {
    AppendWithSpace(&message_, msg);
  }

319 320
  void SetId(int id) { id_ = id; }

M
Mark Callaghan 已提交
321
  void FinishedSingleOp(DB* db) {
322
    if (FLAGS_histogram) {
323
      double now = FLAGS_env->NowMicros();
324 325
      double micros = now - last_op_finish_;
      hist_.Add(micros);
326
      if (micros > 20000 && !FLAGS_stats_interval) {
327 328 329 330 331 332 333 334
        fprintf(stderr, "long op: %.1f micros%30s\r", micros, "");
        fflush(stderr);
      }
      last_op_finish_ = now;
    }

    done_++;
    if (done_ >= next_report_) {
335 336 337 338 339 340 341 342 343 344 345 346 347
      if (!FLAGS_stats_interval) {
        if      (next_report_ < 1000)   next_report_ += 100;
        else if (next_report_ < 5000)   next_report_ += 500;
        else if (next_report_ < 10000)  next_report_ += 1000;
        else if (next_report_ < 50000)  next_report_ += 5000;
        else if (next_report_ < 100000) next_report_ += 10000;
        else if (next_report_ < 500000) next_report_ += 50000;
        else                            next_report_ += 100000;
        fprintf(stderr, "... finished %ld ops%30s\r", done_, "");
        fflush(stderr);
      } else {
        double now = FLAGS_env->NowMicros();
        fprintf(stderr,
M
Mark Callaghan 已提交
348
                "... thread %d: (%ld,%ld) ops and (%.1f,%.1f) ops/second in (%.6f,%.6f) seconds\n",
349
                id_,
M
Mark Callaghan 已提交
350
                done_ - last_report_done_, done_,
351
                (done_ - last_report_done_) /
M
Mark Callaghan 已提交
352 353 354 355
                ((now - last_report_finish_) / 1000000.0),
                done_ / ((now - start_) / 1000000.0),
                (now - last_report_finish_) / 1000000.0,
                (now - start_) / 1000000.0);
M
Mark Callaghan 已提交
356

357 358 359 360 361
        if (FLAGS_stats_per_interval) {
          std::string stats;
          if (db && db->GetProperty("leveldb.stats", &stats))
            fprintf(stderr, stats.c_str());
        }
M
Mark Callaghan 已提交
362

363 364 365 366 367
        fflush(stderr);
        next_report_ += FLAGS_stats_interval;
        last_report_finish_ = now;
        last_report_done_ = done_;
      }
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
    }
  }

  void AddBytes(int64_t n) {
    bytes_ += n;
  }

  void Report(const Slice& name) {
    // Pretend at least one op was done in case we are running a benchmark
    // that does not call FinishedSingleOp().
    if (done_ < 1) done_ = 1;

    std::string extra;
    if (bytes_ > 0) {
      // Rate is computed on actual elapsed time, not the sum of per-thread
      // elapsed times.
      double elapsed = (finish_ - start_) * 1e-6;
      char rate[100];
      snprintf(rate, sizeof(rate), "%6.1f MB/s",
               (bytes_ / 1048576.0) / elapsed);
      extra = rate;
    }
    AppendWithSpace(&extra, message_);
391 392
    double elapsed = (finish_ - start_) * 1e-6;
    double throughput = (double)done_/elapsed;
393

D
Dhruba Borthakur 已提交
394
    fprintf(stdout, "%-12s : %11.3f micros/op %ld ops/sec;%s%s\n",
395 396
            name.ToString().c_str(),
            seconds_ * 1e6 / done_,
D
Dhruba Borthakur 已提交
397
            (long)throughput,
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
            (extra.empty() ? "" : " "),
            extra.c_str());
    if (FLAGS_histogram) {
      fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str());
    }
    fflush(stdout);
  }
};

// State shared by all concurrent executions of the same benchmark.
struct SharedState {
  port::Mutex mu;
  port::CondVar cv;
  int total;

  // Each thread goes through the following states:
  //    (1) initializing
  //    (2) waiting for others to be initialized
  //    (3) running
  //    (4) done

419 420
  long num_initialized;
  long num_done;
421 422 423 424 425 426 427 428 429 430
  bool start;

  SharedState() : cv(&mu) { }
};

// Per-thread state for concurrent executions of the same benchmark.
struct ThreadState {
  int tid;             // 0..n-1 when running in n threads
  Random rand;         // Has different seeds for different threads
  Stats stats;
431
  SharedState* shared;
432 433 434 435 436 437 438

  ThreadState(int index)
      : tid(index),
        rand(1000 + index) {
  }
};

J
jorlow@chromium.org 已提交
439 440 441
class Benchmark {
 private:
  Cache* cache_;
S
Sanjay Ghemawat 已提交
442
  const FilterPolicy* filter_policy_;
J
jorlow@chromium.org 已提交
443
  DB* db_;
444
  long num_;
445 446 447
  int value_size_;
  int entries_per_batch_;
  WriteOptions write_options_;
448
  long reads_;
449
  long writes_;
450
  long readwrites_;
J
jorlow@chromium.org 已提交
451 452
  int heap_counter_;

453 454 455 456 457 458 459
  void PrintHeader() {
    const int kKeySize = 16;
    PrintEnvironment();
    fprintf(stdout, "Keys:       %d bytes each\n", kKeySize);
    fprintf(stdout, "Values:     %d bytes each (%d bytes after compression)\n",
            FLAGS_value_size,
            static_cast<int>(FLAGS_value_size * FLAGS_compression_ratio + 0.5));
460
    fprintf(stdout, "Entries:    %ld\n", num_);
461
    fprintf(stdout, "RawSize:    %.1f MB (estimated)\n",
462 463
            ((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_)
             / 1048576.0));
464 465 466
    fprintf(stdout, "FileSize:   %.1f MB (estimated)\n",
            (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_)
             / 1048576.0));
467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482

    switch (FLAGS_compression_type) {
      case leveldb::kNoCompression:
        fprintf(stdout, "Compression: none\n");
        break;
      case leveldb::kSnappyCompression:
        fprintf(stdout, "Compression: snappy\n");
        break;
      case leveldb::kZlibCompression:
        fprintf(stdout, "Compression: zlib\n");
        break;
      case leveldb::kBZip2Compression:
        fprintf(stdout, "Compression: bzip2\n");
        break;
    }

483 484 485 486 487 488 489 490 491 492 493 494 495 496
    PrintWarnings();
    fprintf(stdout, "------------------------------------------------\n");
  }

  void PrintWarnings() {
#if defined(__GNUC__) && !defined(__OPTIMIZE__)
    fprintf(stdout,
            "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"
            );
#endif
#ifndef NDEBUG
    fprintf(stdout,
            "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
#endif
497

498 499 500 501 502 503 504 505 506 507 508 509 510
    if (FLAGS_compression_type != leveldb::kNoCompression) {
      // The test string should not be too small.
      const int len = FLAGS_block_size;
      char* text = (char*) malloc(len+1);
      bool result = true;
      const char* name = NULL;
      std::string compressed;

      memset(text, (int) 'y', len);
      text[len] = '\0';

      switch (FLAGS_compression_type) {
        case kSnappyCompression:
511 512
          result = port::Snappy_Compress(Options().compression_opts, text,
                                         strlen(text), &compressed);
513 514 515
          name = "Snappy";
          break;
        case kZlibCompression:
516 517
          result = port::Zlib_Compress(Options().compression_opts, text,
                                       strlen(text), &compressed);
518 519 520
          name = "Zlib";
          break;
        case kBZip2Compression:
521 522
          result = port::BZip2_Compress(Options().compression_opts, text,
                                        strlen(text), &compressed);
523 524
          name = "BZip2";
          break;
525 526 527
        case kNoCompression:
          assert(false); // cannot happen
          break;
528 529 530 531 532 533 534 535 536
      }

      if (!result) {
        fprintf(stdout, "WARNING: %s compression is not enabled\n", name);
      } else if (name && compressed.size() >= strlen(text)) {
        fprintf(stdout, "WARNING: %s compression is not effective\n", name);
      }

      free(text);
537
    }
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
  }

  void PrintEnvironment() {
    fprintf(stderr, "LevelDB:    version %d.%d\n",
            kMajorVersion, kMinorVersion);

#if defined(__linux)
    time_t now = time(NULL);
    fprintf(stderr, "Date:       %s", ctime(&now));  // ctime() adds newline

    FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
    if (cpuinfo != NULL) {
      char line[1000];
      int num_cpus = 0;
      std::string cpu_type;
      std::string cache_size;
      while (fgets(line, sizeof(line), cpuinfo) != NULL) {
        const char* sep = strchr(line, ':');
        if (sep == NULL) {
          continue;
        }
        Slice key = TrimSpace(Slice(line, sep - 1 - line));
        Slice val = TrimSpace(Slice(sep + 1));
        if (key == "model name") {
          ++num_cpus;
          cpu_type = val.ToString();
        } else if (key == "cache size") {
          cache_size = val.ToString();
        }
      }
      fclose(cpuinfo);
      fprintf(stderr, "CPU:        %d * %s\n", num_cpus, cpu_type.c_str());
      fprintf(stderr, "CPUCache:   %s\n", cache_size.c_str());
    }
#endif
  }

575 576
  void PrintStatistics() {
    if (FLAGS_statistics) {
A
Abhishek Kona 已提交
577
      fprintf(stdout, "File opened:%ld closed:%ld errors:%ld\n"
578 579 580 581 582 583 584
          "Block Cache Hit Count:%ld Block Cache Miss Count:%ld\n"
          "Bloom Filter Useful: %ld \n"
          "Compaction key_drop_newer_entry: %ld key_drop_obsolete: %ld "
          "Compaction key_drop_user: %ld",
          dbstats->getNumFileOpens(),
          dbstats->getNumFileCloses(),
          dbstats->getNumFileErrors(),
A
Abhishek Kona 已提交
585
          dbstats->getTickerCount(BLOCK_CACHE_HIT),
586 587 588 589 590
          dbstats->getTickerCount(BLOCK_CACHE_MISS),
          dbstats->getTickerCount(BLOOM_FILTER_USEFUL),
          dbstats->getTickerCount(COMPACTION_KEY_DROP_NEWER_ENTRY),
          dbstats->getTickerCount(COMPACTION_KEY_DROP_OBSOLETE),
          dbstats->getTickerCount(COMPACTION_KEY_DROP_USER));
591 592 593
    }
  }

J
jorlow@chromium.org 已提交
594
 public:
595
  Benchmark()
596 597 598
  : cache_(FLAGS_cache_size >= 0 ?
           (FLAGS_cache_numshardbits >= 1 ?
            NewLRUCache(FLAGS_cache_size, FLAGS_cache_numshardbits) :
599
            NewLRUCache(FLAGS_cache_size)) : NULL),
S
Sanjay Ghemawat 已提交
600 601 602
    filter_policy_(FLAGS_bloom_bits >= 0
                   ? NewBloomFilterPolicy(FLAGS_bloom_bits)
                   : NULL),
603 604
    db_(NULL),
    num_(FLAGS_num),
605 606
    value_size_(FLAGS_value_size),
    entries_per_batch_(1),
607
    reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
608
    writes_(FLAGS_writes < 0 ? FLAGS_num : FLAGS_writes),
609
    readwrites_((FLAGS_writes < 0  && FLAGS_reads < 0)? FLAGS_num :
610
                ((FLAGS_writes > FLAGS_reads) ? FLAGS_writes : FLAGS_reads)
611
               ),
612
    heap_counter_(0) {
J
jorlow@chromium.org 已提交
613
    std::vector<std::string> files;
614
    FLAGS_env->GetChildren(FLAGS_db, &files);
615
    for (unsigned int i = 0; i < files.size(); i++) {
J
jorlow@chromium.org 已提交
616
      if (Slice(files[i]).starts_with("heap-")) {
617
        FLAGS_env->DeleteFile(std::string(FLAGS_db) + "/" + files[i]);
J
jorlow@chromium.org 已提交
618 619
      }
    }
620
    if (!FLAGS_use_existing_db) {
621
      DestroyDB(FLAGS_db, Options());
622
    }
J
jorlow@chromium.org 已提交
623 624 625 626 627
  }

  ~Benchmark() {
    delete db_;
    delete cache_;
S
Sanjay Ghemawat 已提交
628
    delete filter_policy_;
J
jorlow@chromium.org 已提交
629 630 631
  }

  void Run() {
632 633
    PrintHeader();
    Open();
J
jorlow@chromium.org 已提交
634 635 636 637 638 639 640 641 642 643 644 645 646

    const char* benchmarks = FLAGS_benchmarks;
    while (benchmarks != NULL) {
      const char* sep = strchr(benchmarks, ',');
      Slice name;
      if (sep == NULL) {
        name = benchmarks;
        benchmarks = NULL;
      } else {
        name = Slice(benchmarks, sep - benchmarks);
        benchmarks = sep + 1;
      }

647 648
      // Reset parameters that may be overriddden bwlow
      num_ = FLAGS_num;
649
      reads_ = (FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads);
650
      writes_ = (FLAGS_writes < 0 ? FLAGS_num : FLAGS_writes);
651 652 653
      value_size_ = FLAGS_value_size;
      entries_per_batch_ = 1;
      write_options_ = WriteOptions();
654 655 656
      if (FLAGS_sync) {
        write_options_.sync = true;
      }
657

H
heyongqiang 已提交
658 659
      write_options_.disableWAL = FLAGS_disable_wal;

660 661
      void (Benchmark::*method)(ThreadState*) = NULL;
      bool fresh_db = false;
662
      int num_threads = FLAGS_threads;
663 664

      if (name == Slice("fillseq")) {
665 666
        fresh_db = true;
        method = &Benchmark::WriteSeq;
667
      } else if (name == Slice("fillbatch")) {
668 669 670
        fresh_db = true;
        entries_per_batch_ = 1000;
        method = &Benchmark::WriteSeq;
671
      } else if (name == Slice("fillrandom")) {
672 673
        fresh_db = true;
        method = &Benchmark::WriteRandom;
674
      } else if (name == Slice("overwrite")) {
675 676
        fresh_db = false;
        method = &Benchmark::WriteRandom;
677
      } else if (name == Slice("fillsync")) {
678 679 680 681
        fresh_db = true;
        num_ /= 1000;
        write_options_.sync = true;
        method = &Benchmark::WriteRandom;
682
      } else if (name == Slice("fill100K")) {
683 684 685 686
        fresh_db = true;
        num_ /= 1000;
        value_size_ = 100 * 1000;
        method = &Benchmark::WriteRandom;
J
jorlow@chromium.org 已提交
687
      } else if (name == Slice("readseq")) {
688
        method = &Benchmark::ReadSequential;
J
jorlow@chromium.org 已提交
689
      } else if (name == Slice("readreverse")) {
690
        method = &Benchmark::ReadReverse;
J
jorlow@chromium.org 已提交
691
      } else if (name == Slice("readrandom")) {
692
        method = &Benchmark::ReadRandom;
S
Sanjay Ghemawat 已提交
693 694 695 696
      } else if (name == Slice("readmissing")) {
        method = &Benchmark::ReadMissing;
      } else if (name == Slice("seekrandom")) {
        method = &Benchmark::SeekRandom;
697
      } else if (name == Slice("readhot")) {
698
        method = &Benchmark::ReadHot;
699
      } else if (name == Slice("readrandomsmall")) {
700
        reads_ /= 1000;
701
        method = &Benchmark::ReadRandom;
S
Sanjay Ghemawat 已提交
702 703 704 705
      } else if (name == Slice("deleteseq")) {
        method = &Benchmark::DeleteSeq;
      } else if (name == Slice("deleterandom")) {
        method = &Benchmark::DeleteRandom;
706 707 708
      } else if (name == Slice("readwhilewriting")) {
        num_threads++;  // Add extra thread for writing
        method = &Benchmark::ReadWhileWriting;
709 710
      } else if (name == Slice("readrandomwriterandom")) {
        method = &Benchmark::ReadRandomWriteRandom;
J
jorlow@chromium.org 已提交
711
      } else if (name == Slice("compact")) {
712
        method = &Benchmark::Compact;
J
jorlow@chromium.org 已提交
713
      } else if (name == Slice("crc32c")) {
714
        method = &Benchmark::Crc32c;
715
      } else if (name == Slice("acquireload")) {
716
        method = &Benchmark::AcquireLoad;
717
      } else if (name == Slice("snappycomp")) {
718
        method = &Benchmark::SnappyCompress;
719
      } else if (name == Slice("snappyuncomp")) {
720
        method = &Benchmark::SnappyUncompress;
J
jorlow@chromium.org 已提交
721 722
      } else if (name == Slice("heapprofile")) {
        HeapProfile();
723
      } else if (name == Slice("stats")) {
S
Sanjay Ghemawat 已提交
724 725 726
        PrintStats("leveldb.stats");
      } else if (name == Slice("sstables")) {
        PrintStats("leveldb.sstables");
J
jorlow@chromium.org 已提交
727
      } else {
728 729 730 731
        if (name != Slice()) {  // No error message for empty name
          fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
        }
      }
732 733 734 735 736 737 738 739 740 741 742 743 744 745 746

      if (fresh_db) {
        if (FLAGS_use_existing_db) {
          fprintf(stdout, "%-12s : skipped (--use_existing_db is true)\n",
                  name.ToString().c_str());
          method = NULL;
        } else {
          delete db_;
          db_ = NULL;
          DestroyDB(FLAGS_db, Options());
          Open();
        }
      }

      if (method != NULL) {
747
        RunBenchmark(num_threads, name, method);
J
jorlow@chromium.org 已提交
748 749
      }
    }
750
    PrintStatistics();
J
jorlow@chromium.org 已提交
751 752
  }

753
 private:
754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775
  struct ThreadArg {
    Benchmark* bm;
    SharedState* shared;
    ThreadState* thread;
    void (Benchmark::*method)(ThreadState*);
  };

  static void ThreadBody(void* v) {
    ThreadArg* arg = reinterpret_cast<ThreadArg*>(v);
    SharedState* shared = arg->shared;
    ThreadState* thread = arg->thread;
    {
      MutexLock l(&shared->mu);
      shared->num_initialized++;
      if (shared->num_initialized >= shared->total) {
        shared->cv.SignalAll();
      }
      while (!shared->start) {
        shared->cv.Wait();
      }
    }

776
    thread->stats.Start(thread->tid);
777 778 779 780 781 782 783 784 785 786 787 788
    (arg->bm->*(arg->method))(thread);
    thread->stats.Stop();

    {
      MutexLock l(&shared->mu);
      shared->num_done++;
      if (shared->num_done >= shared->total) {
        shared->cv.SignalAll();
      }
    }
  }

789 790
  void RunBenchmark(int n, Slice name,
                    void (Benchmark::*method)(ThreadState*)) {
791 792 793 794 795 796 797 798 799 800 801 802
    SharedState shared;
    shared.total = n;
    shared.num_initialized = 0;
    shared.num_done = 0;
    shared.start = false;

    ThreadArg* arg = new ThreadArg[n];
    for (int i = 0; i < n; i++) {
      arg[i].bm = this;
      arg[i].method = method;
      arg[i].shared = &shared;
      arg[i].thread = new ThreadState(i);
803
      arg[i].thread->shared = &shared;
804
      FLAGS_env->StartThread(ThreadBody, &arg[i]);
805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830
    }

    shared.mu.Lock();
    while (shared.num_initialized < n) {
      shared.cv.Wait();
    }

    shared.start = true;
    shared.cv.SignalAll();
    while (shared.num_done < n) {
      shared.cv.Wait();
    }
    shared.mu.Unlock();

    for (int i = 1; i < n; i++) {
      arg[0].thread->stats.Merge(arg[i].thread->stats);
    }
    arg[0].thread->stats.Report(name);

    for (int i = 0; i < n; i++) {
      delete arg[i].thread;
    }
    delete[] arg;
  }

  void Crc32c(ThreadState* thread) {
J
jorlow@chromium.org 已提交
831
    // Checksum about 500MB of data total
832 833
    const int size = 4096;
    const char* label = "(4K per op)";
J
jorlow@chromium.org 已提交
834
    std::string data(size, 'x');
J
jorlow@chromium.org 已提交
835 836 837 838
    int64_t bytes = 0;
    uint32_t crc = 0;
    while (bytes < 500 * 1048576) {
      crc = crc32c::Value(data.data(), size);
M
Mark Callaghan 已提交
839
      thread->stats.FinishedSingleOp(NULL);
J
jorlow@chromium.org 已提交
840 841 842 843 844
      bytes += size;
    }
    // Print so result is not dead
    fprintf(stderr, "... crc=0x%x\r", static_cast<unsigned int>(crc));

845 846
    thread->stats.AddBytes(bytes);
    thread->stats.AddMessage(label);
J
jorlow@chromium.org 已提交
847 848
  }

849
  void AcquireLoad(ThreadState* thread) {
850 851 852 853
    int dummy;
    port::AtomicPointer ap(&dummy);
    int count = 0;
    void *ptr = NULL;
854
    thread->stats.AddMessage("(each op is 1000 loads)");
855 856 857 858 859
    while (count < 100000) {
      for (int i = 0; i < 1000; i++) {
        ptr = ap.Acquire_Load();
      }
      count++;
M
Mark Callaghan 已提交
860
      thread->stats.FinishedSingleOp(NULL);
861 862 863 864
    }
    if (ptr == NULL) exit(1); // Disable unused variable warning.
  }

865 866 867
  void SnappyCompress(ThreadState* thread) {
    RandomGenerator gen;
    Slice input = gen.Generate(Options().block_size);
868 869 870 871 872
    int64_t bytes = 0;
    int64_t produced = 0;
    bool ok = true;
    std::string compressed;
    while (ok && bytes < 1024 * 1048576) {  // Compress 1G
873 874
      ok = port::Snappy_Compress(Options().compression_opts, input.data(),
                                 input.size(), &compressed);
875 876
      produced += compressed.size();
      bytes += input.size();
M
Mark Callaghan 已提交
877
      thread->stats.FinishedSingleOp(NULL);
878 879 880
    }

    if (!ok) {
881
      thread->stats.AddMessage("(snappy failure)");
882 883 884 885
    } else {
      char buf[100];
      snprintf(buf, sizeof(buf), "(output: %.1f%%)",
               (produced * 100.0) / bytes);
886 887
      thread->stats.AddMessage(buf);
      thread->stats.AddBytes(bytes);
888 889 890
    }
  }

891 892 893
  void SnappyUncompress(ThreadState* thread) {
    RandomGenerator gen;
    Slice input = gen.Generate(Options().block_size);
894
    std::string compressed;
895 896
    bool ok = port::Snappy_Compress(Options().compression_opts, input.data(),
                                    input.size(), &compressed);
897
    int64_t bytes = 0;
898
    char* uncompressed = new char[input.size()];
899 900
    while (ok && bytes < 1024 * 1048576) {  // Compress 1G
      ok =  port::Snappy_Uncompress(compressed.data(), compressed.size(),
901 902
                                    uncompressed);
      bytes += input.size();
M
Mark Callaghan 已提交
903
      thread->stats.FinishedSingleOp(NULL);
904
    }
905
    delete[] uncompressed;
906 907

    if (!ok) {
908
      thread->stats.AddMessage("(snappy failure)");
909
    } else {
910
      thread->stats.AddBytes(bytes);
911 912 913
    }
  }

914 915 916
  void Open() {
    assert(db_ == NULL);
    Options options;
917
    options.create_if_missing = !FLAGS_use_existing_db;
918
    options.block_cache = cache_;
919 920 921
    if (cache_ == NULL) {
      options.no_block_cache = true;
    }
922
    options.write_buffer_size = FLAGS_write_buffer_size;
923
    options.block_size = FLAGS_block_size;
S
Sanjay Ghemawat 已提交
924
    options.filter_policy = filter_policy_;
925 926
    options.max_open_files = FLAGS_open_files;
    options.statistics = dbstats;
927
    options.env = FLAGS_env;
H
heyongqiang 已提交
928
    options.disableDataSync = FLAGS_disable_data_sync;
929
    options.use_fsync = FLAGS_use_fsync;
930
    options.num_levels = FLAGS_num_levels;
H
heyongqiang 已提交
931 932 933 934 935
    options.target_file_size_base = FLAGS_target_file_size_base;
    options.target_file_size_multiplier = FLAGS_target_file_size_multiplier;
    options.max_bytes_for_level_base = FLAGS_max_bytes_for_level_base;
    options.max_bytes_for_level_multiplier =
        FLAGS_max_bytes_for_level_multiplier;
H
heyongqiang 已提交
936
    options.level0_stop_writes_trigger = FLAGS_level0_stop_writes_trigger;
M
Mark Callaghan 已提交
937 938
    options.level0_file_num_compaction_trigger =
        FLAGS_level0_file_num_compaction_trigger;
H
heyongqiang 已提交
939 940
    options.level0_slowdown_writes_trigger =
      FLAGS_level0_slowdown_writes_trigger;
941
    options.compression = FLAGS_compression_type;
942 943 944
    if (FLAGS_min_level_to_compress >= 0) {
      assert(FLAGS_min_level_to_compress <= FLAGS_num_levels);
      options.compression_per_level = new CompressionType[FLAGS_num_levels];
945
      for (int i = 0; i < FLAGS_min_level_to_compress; i++) {
946 947
        options.compression_per_level[i] = kNoCompression;
      }
948
      for (unsigned int i = FLAGS_min_level_to_compress;
949 950 951 952
           i < FLAGS_num_levels; i++) {
        options.compression_per_level[i] = FLAGS_compression_type;
      }
    }
953
    options.disable_seek_compaction = FLAGS_disable_seek_compaction;
954 955
    options.delete_obsolete_files_period_micros =
      FLAGS_delete_obsolete_files_period_micros;
956
    options.rate_limit = FLAGS_rate_limit;
957
    options.table_cache_numshardbits = FLAGS_table_cache_numshardbits;
H
heyongqiang 已提交
958 959 960 961 962 963
    Status s;
    if(FLAGS_read_only) {
      s = DB::OpenForReadOnly(options, FLAGS_db, &db_);
    } else {
      s = DB::Open(options, FLAGS_db, &db_);
    }
964 965 966 967
    if (!s.ok()) {
      fprintf(stderr, "open error: %s\n", s.ToString().c_str());
      exit(1);
    }
968 969 970
    if (FLAGS_min_level_to_compress >= 0) {
      delete options.compression_per_level;
    }
971 972
  }

973 974 975
  void WriteSeq(ThreadState* thread) {
    DoWrite(thread, true);
  }
976

977 978 979 980 981 982
  void WriteRandom(ThreadState* thread) {
    DoWrite(thread, false);
  }

  void DoWrite(ThreadState* thread, bool seq) {
    if (num_ != FLAGS_num) {
983
      char msg[100];
984
      snprintf(msg, sizeof(msg), "(%ld ops)", num_);
985
      thread->stats.AddMessage(msg);
986 987
    }

988
    RandomGenerator gen;
J
jorlow@chromium.org 已提交
989 990
    WriteBatch batch;
    Status s;
991
    int64_t bytes = 0;
992
    for (int i = 0; i < writes_; i += entries_per_batch_) {
J
jorlow@chromium.org 已提交
993
      batch.Clear();
994 995
      for (int j = 0; j < entries_per_batch_; j++) {
        const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num);
996 997
        char key[100];
        snprintf(key, sizeof(key), "%016d", k);
998 999
        batch.Put(key, gen.Generate(value_size_));
        bytes += value_size_ + strlen(key);
M
Mark Callaghan 已提交
1000
        thread->stats.FinishedSingleOp(db_);
1001
      }
1002
      s = db_->Write(write_options_, &batch);
J
jorlow@chromium.org 已提交
1003 1004 1005 1006 1007
      if (!s.ok()) {
        fprintf(stderr, "put error: %s\n", s.ToString().c_str());
        exit(1);
      }
    }
1008
    thread->stats.AddBytes(bytes);
J
jorlow@chromium.org 已提交
1009 1010
  }

1011
  void ReadSequential(ThreadState* thread) {
1012
    Iterator* iter = db_->NewIterator(ReadOptions(FLAGS_verify_checksum, true));
1013
    long i = 0;
1014
    int64_t bytes = 0;
1015
    for (iter->SeekToFirst(); i < reads_ && iter->Valid(); iter->Next()) {
1016
      bytes += iter->key().size() + iter->value().size();
M
Mark Callaghan 已提交
1017
      thread->stats.FinishedSingleOp(db_);
1018 1019 1020
      ++i;
    }
    delete iter;
1021
    thread->stats.AddBytes(bytes);
1022 1023
  }

1024
  void ReadReverse(ThreadState* thread) {
1025
    Iterator* iter = db_->NewIterator(ReadOptions(FLAGS_verify_checksum, true));
1026
    long i = 0;
1027
    int64_t bytes = 0;
1028
    for (iter->SeekToLast(); i < reads_ && iter->Valid(); iter->Prev()) {
1029
      bytes += iter->key().size() + iter->value().size();
M
Mark Callaghan 已提交
1030
      thread->stats.FinishedSingleOp(db_);
1031 1032 1033
      ++i;
    }
    delete iter;
1034
    thread->stats.AddBytes(bytes);
1035 1036
  }

1037
  void ReadRandom(ThreadState* thread) {
1038
    ReadOptions options(FLAGS_verify_checksum, true);
1039
    std::string value;
1040
    long found = 0;
1041
    for (long i = 0; i < reads_; i++) {
1042
      char key[100];
1043
      const int k = thread->rand.Next() % FLAGS_num;
1044
      snprintf(key, sizeof(key), "%016d", k);
S
Sanjay Ghemawat 已提交
1045 1046 1047
      if (db_->Get(options, key, &value).ok()) {
        found++;
      }
M
Mark Callaghan 已提交
1048
      thread->stats.FinishedSingleOp(db_);
S
Sanjay Ghemawat 已提交
1049 1050
    }
    char msg[100];
1051
    snprintf(msg, sizeof(msg), "(%ld of %ld found)", found, num_);
S
Sanjay Ghemawat 已提交
1052 1053 1054 1055
    thread->stats.AddMessage(msg);
  }

  void ReadMissing(ThreadState* thread) {
1056
    ReadOptions options(FLAGS_verify_checksum, true);
S
Sanjay Ghemawat 已提交
1057
    std::string value;
1058
    for (long i = 0; i < reads_; i++) {
S
Sanjay Ghemawat 已提交
1059 1060 1061
      char key[100];
      const int k = thread->rand.Next() % FLAGS_num;
      snprintf(key, sizeof(key), "%016d.", k);
1062
      db_->Get(options, key, &value);
M
Mark Callaghan 已提交
1063
      thread->stats.FinishedSingleOp(db_);
J
jorlow@chromium.org 已提交
1064 1065 1066
    }
  }

1067
  void ReadHot(ThreadState* thread) {
1068
    ReadOptions options(FLAGS_verify_checksum, true);
1069
    std::string value;
1070 1071
    const long range = (FLAGS_num + 99) / 100;
    for (long i = 0; i < reads_; i++) {
1072
      char key[100];
1073
      const int k = thread->rand.Next() % range;
1074 1075
      snprintf(key, sizeof(key), "%016d", k);
      db_->Get(options, key, &value);
M
Mark Callaghan 已提交
1076
      thread->stats.FinishedSingleOp(db_);
1077 1078 1079
    }
  }

S
Sanjay Ghemawat 已提交
1080
  void SeekRandom(ThreadState* thread) {
1081
    ReadOptions options(FLAGS_verify_checksum, true);
S
Sanjay Ghemawat 已提交
1082
    std::string value;
1083
    long found = 0;
1084
    for (long i = 0; i < reads_; i++) {
S
Sanjay Ghemawat 已提交
1085 1086 1087 1088 1089 1090 1091
      Iterator* iter = db_->NewIterator(options);
      char key[100];
      const int k = thread->rand.Next() % FLAGS_num;
      snprintf(key, sizeof(key), "%016d", k);
      iter->Seek(key);
      if (iter->Valid() && iter->key() == key) found++;
      delete iter;
M
Mark Callaghan 已提交
1092
      thread->stats.FinishedSingleOp(db_);
S
Sanjay Ghemawat 已提交
1093 1094
    }
    char msg[100];
1095
    snprintf(msg, sizeof(msg), "(%ld of %ld found)", found, num_);
S
Sanjay Ghemawat 已提交
1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109
    thread->stats.AddMessage(msg);
  }

  void DoDelete(ThreadState* thread, bool seq) {
    RandomGenerator gen;
    WriteBatch batch;
    Status s;
    for (int i = 0; i < num_; i += entries_per_batch_) {
      batch.Clear();
      for (int j = 0; j < entries_per_batch_; j++) {
        const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num);
        char key[100];
        snprintf(key, sizeof(key), "%016d", k);
        batch.Delete(key);
M
Mark Callaghan 已提交
1110
        thread->stats.FinishedSingleOp(db_);
S
Sanjay Ghemawat 已提交
1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127
      }
      s = db_->Write(write_options_, &batch);
      if (!s.ok()) {
        fprintf(stderr, "del error: %s\n", s.ToString().c_str());
        exit(1);
      }
    }
  }

  void DeleteSeq(ThreadState* thread) {
    DoDelete(thread, true);
  }

  void DeleteRandom(ThreadState* thread) {
    DoDelete(thread, false);
  }

1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153
  void ReadWhileWriting(ThreadState* thread) {
    if (thread->tid > 0) {
      ReadRandom(thread);
    } else {
      // Special thread that keeps writing until other threads are done.
      RandomGenerator gen;
      while (true) {
        {
          MutexLock l(&thread->shared->mu);
          if (thread->shared->num_done + 1 >= thread->shared->num_initialized) {
            // Other threads have finished
            break;
          }
        }

        const int k = thread->rand.Next() % FLAGS_num;
        char key[100];
        snprintf(key, sizeof(key), "%016d", k);
        Status s = db_->Put(write_options_, key, gen.Generate(value_size_));
        if (!s.ok()) {
          fprintf(stderr, "put error: %s\n", s.ToString().c_str());
          exit(1);
        }
      }

      // Do not count any of the preceding work/delay in stats.
1154
      thread->stats.Start(thread->tid);
1155 1156 1157
    }
  }

1158 1159
  //
  // This is diffferent from ReadWhileWriting because it does not use
1160
  // an extra thread.
1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171
  //
  void ReadRandomWriteRandom(ThreadState* thread) {
    ReadOptions options(FLAGS_verify_checksum, true);
    RandomGenerator gen;
    std::string value;
    long found = 0;
    int get_weight = 0;
    int put_weight = 0;
    long reads_done = 0;
    long writes_done = 0;
    // the number of iterations is the larger of read_ or write_
1172
    for (long i = 0; i < readwrites_; i++) {
1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198
      char key[100];
      const int k = thread->rand.Next() % FLAGS_num;
      snprintf(key, sizeof(key), "%016d", k);
      if (get_weight == 0 && put_weight == 0) {
        // one batch complated, reinitialize for next batch
        get_weight = FLAGS_readwritepercent;
        put_weight = 100 - get_weight;
      }
      if (get_weight > 0) {
        // do all the gets first
        if (db_->Get(options, key, &value).ok()) {
          found++;
        }
        get_weight--;
        reads_done++;
      } else  if (put_weight > 0) {
        // then do all the corresponding number of puts
        // for all the gets we have done earlier
        Status s = db_->Put(write_options_, key, gen.Generate(value_size_));
        if (!s.ok()) {
          fprintf(stderr, "put error: %s\n", s.ToString().c_str());
          exit(1);
        }
        put_weight--;
        writes_done++;
      }
M
Mark Callaghan 已提交
1199
      thread->stats.FinishedSingleOp(db_);
1200 1201
    }
    char msg[100];
1202
    snprintf(msg, sizeof(msg), "( reads:%ld writes:%ld total:%ld )",
1203
             reads_done, writes_done, readwrites_);
1204 1205 1206
    thread->stats.AddMessage(msg);
  }

1207
  void Compact(ThreadState* thread) {
G
Gabor Cselle 已提交
1208
    db_->CompactRange(NULL, NULL);
J
jorlow@chromium.org 已提交
1209 1210
  }

S
Sanjay Ghemawat 已提交
1211
  void PrintStats(const char* key) {
1212
    std::string stats;
S
Sanjay Ghemawat 已提交
1213
    if (!db_->GetProperty(key, &stats)) {
1214
      stats = "(failed)";
1215
    }
1216
    fprintf(stdout, "\n%s\n", stats.c_str());
1217 1218
  }

J
jorlow@chromium.org 已提交
1219 1220 1221 1222 1223 1224
  static void WriteToFile(void* arg, const char* buf, int n) {
    reinterpret_cast<WritableFile*>(arg)->Append(Slice(buf, n));
  }

  void HeapProfile() {
    char fname[100];
1225
    snprintf(fname, sizeof(fname), "%s/heap-%04d", FLAGS_db, ++heap_counter_);
J
jorlow@chromium.org 已提交
1226
    WritableFile* file;
1227
    Status s = FLAGS_env->NewWritableFile(fname, &file);
J
jorlow@chromium.org 已提交
1228
    if (!s.ok()) {
1229
      fprintf(stderr, "%s\n", s.ToString().c_str());
J
jorlow@chromium.org 已提交
1230 1231 1232 1233 1234
      return;
    }
    bool ok = port::GetHeapProfile(WriteToFile, file);
    delete file;
    if (!ok) {
1235
      fprintf(stderr, "heap profiling not supported\n");
1236
      FLAGS_env->DeleteFile(fname);
J
jorlow@chromium.org 已提交
1237 1238 1239 1240
    }
  }
};

H
Hans Wennborg 已提交
1241
}  // namespace leveldb
J
jorlow@chromium.org 已提交
1242 1243

int main(int argc, char** argv) {
1244
  FLAGS_write_buffer_size = leveldb::Options().write_buffer_size;
1245
  FLAGS_open_files = leveldb::Options().max_open_files;
1246 1247
  // Compression test code above refers to FLAGS_block_size
  FLAGS_block_size = leveldb::Options().block_size;
H
heyongqiang 已提交
1248
  std::string default_db_path;
1249

J
jorlow@chromium.org 已提交
1250 1251 1252
  for (int i = 1; i < argc; i++) {
    double d;
    int n;
1253
    long l;
J
jorlow@chromium.org 已提交
1254
    char junk;
1255
    char hdfsname[2048];
J
jorlow@chromium.org 已提交
1256 1257 1258 1259 1260 1261 1262
    if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) {
      FLAGS_benchmarks = argv[i] + strlen("--benchmarks=");
    } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) {
      FLAGS_compression_ratio = d;
    } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      FLAGS_histogram = n;
1263 1264 1265
    } else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      FLAGS_use_existing_db = n;
1266 1267
    } else if (sscanf(argv[i], "--num=%ld%c", &l, &junk) == 1) {
      FLAGS_num = l;
1268 1269
    } else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) {
      FLAGS_reads = n;
1270 1271
    } else if (sscanf(argv[i], "--threads=%d%c", &n, &junk) == 1) {
      FLAGS_threads = n;
J
jorlow@chromium.org 已提交
1272 1273 1274 1275
    } else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) {
      FLAGS_value_size = n;
    } else if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) {
      FLAGS_write_buffer_size = n;
1276 1277
    } else if (sscanf(argv[i], "--cache_size=%ld%c", &l, &junk) == 1) {
      FLAGS_cache_size = l;
1278 1279
    } else if (sscanf(argv[i], "--block_size=%d%c", &n, &junk) == 1) {
      FLAGS_block_size = n;
1280
    } else if (sscanf(argv[i], "--cache_numshardbits=%d%c", &n, &junk) == 1) {
1281 1282 1283 1284 1285 1286
      if (n < 20) {
        FLAGS_cache_numshardbits = n;
      } else {
        fprintf(stderr, "The cache cannot be sharded into 2**%d pieces\n", n);
        exit(1);
      }
1287 1288 1289 1290 1291 1292 1293
    } else if (sscanf(argv[i], "--table_cache_numshardbits=%d%c",
		      &n, &junk) == 1) {
      if (n <= 0 || n > 20) {
        fprintf(stderr, "The cache cannot be sharded into 2**%d pieces\n", n);
        exit(1);
      }
      FLAGS_table_cache_numshardbits = n;
S
Sanjay Ghemawat 已提交
1294 1295
    } else if (sscanf(argv[i], "--bloom_bits=%d%c", &n, &junk) == 1) {
      FLAGS_bloom_bits = n;
1296 1297
    } else if (sscanf(argv[i], "--open_files=%d%c", &n, &junk) == 1) {
      FLAGS_open_files = n;
1298 1299
    } else if (strncmp(argv[i], "--db=", 5) == 0) {
      FLAGS_db = argv[i] + 5;
1300 1301 1302
    } else if (sscanf(argv[i], "--verify_checksum=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      FLAGS_verify_checksum = n;
1303 1304 1305
    } else if (sscanf(argv[i], "--bufferedio=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      useOsBuffer = n;
1306 1307 1308
    } else if (sscanf(argv[i], "--mmap_read=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      useMmapRead = n;
1309 1310 1311
    } else if (sscanf(argv[i], "--mmap_write=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      useMmapWrite = n;
1312
    } else if (sscanf(argv[i], "--readahead=%d%c", &n, &junk) == 1 &&
1313 1314
               (n == 0 || n == 1)) {
      useFsReadAhead = n;
1315 1316 1317 1318 1319 1320
    } else if (sscanf(argv[i], "--statistics=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      if (n == 1) {
        dbstats = new leveldb::DBStatistics();
        FLAGS_statistics = true;
      }
1321 1322 1323 1324 1325
    } else if (sscanf(argv[i], "--writes=%d%c", &n, &junk) == 1) {
      FLAGS_writes = n;
    } else if (sscanf(argv[i], "--sync=%d%c", &n, &junk) == 1 &&
               (n == 0 || n == 1)) {
      FLAGS_sync = n;
1326
    } else if (sscanf(argv[i], "--readwritepercent=%d%c", &n, &junk) == 1 &&
1327
               n > 0 && n < 100) {
1328
      FLAGS_readwritepercent = n;
H
heyongqiang 已提交
1329 1330 1331
    } else if (sscanf(argv[i], "--disable_data_sync=%d%c", &n, &junk) == 1 &&
        (n == 0 || n == 1)) {
      FLAGS_disable_data_sync = n;
1332 1333 1334
    } else if (sscanf(argv[i], "--use_fsync=%d%c", &n, &junk) == 1 &&
        (n == 0 || n == 1)) {
      FLAGS_use_fsync = n;
H
heyongqiang 已提交
1335
    } else if (sscanf(argv[i], "--disable_wal=%d%c", &n, &junk) == 1 &&
1336
        (n == 0 || n == 1)) {
H
heyongqiang 已提交
1337
      FLAGS_disable_wal = n;
1338
    } else if (sscanf(argv[i], "--hdfs=%s", hdfsname) == 1) {
1339
      FLAGS_env  = new leveldb::HdfsEnv(hdfsname);
1340 1341 1342
    } else if (sscanf(argv[i], "--num_levels=%d%c",
        &n, &junk) == 1) {
      FLAGS_num_levels = n;
H
heyongqiang 已提交
1343 1344 1345 1346 1347 1348 1349
    } else if (sscanf(argv[i], "--target_file_size_base=%d%c",
        &n, &junk) == 1) {
      FLAGS_target_file_size_base = n;
    } else if ( sscanf(argv[i], "--target_file_size_multiplier=%d%c",
        &n, &junk) == 1) {
      FLAGS_target_file_size_multiplier = n;
    } else if (
1350 1351
        sscanf(argv[i], "--max_bytes_for_level_base=%ld%c", &l, &junk) == 1) {
      FLAGS_max_bytes_for_level_base = l;
H
heyongqiang 已提交
1352 1353 1354
    } else if (sscanf(argv[i], "--max_bytes_for_level_multiplier=%d%c",
        &n, &junk) == 1) {
      FLAGS_max_bytes_for_level_multiplier = n;
H
heyongqiang 已提交
1355 1356 1357 1358 1359 1360
    } else if (sscanf(argv[i],"--level0_stop_writes_trigger=%d%c",
        &n, &junk) == 1) {
      FLAGS_level0_stop_writes_trigger = n;
    } else if (sscanf(argv[i],"--level0_slowdown_writes_trigger=%d%c",
        &n, &junk) == 1) {
      FLAGS_level0_slowdown_writes_trigger = n;
M
Mark Callaghan 已提交
1361 1362 1363
    } else if (sscanf(argv[i],"--level0_file_num_compaction_trigger=%d%c",
        &n, &junk) == 1) {
      FLAGS_level0_file_num_compaction_trigger = n;
1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376
    } else if (strncmp(argv[i], "--compression_type=", 19) == 0) {
      const char* ctype = argv[i] + 19;
      if (!strcasecmp(ctype, "none"))
        FLAGS_compression_type = leveldb::kNoCompression;
      else if (!strcasecmp(ctype, "snappy"))
        FLAGS_compression_type = leveldb::kSnappyCompression;
      else if (!strcasecmp(ctype, "zlib"))
        FLAGS_compression_type = leveldb::kZlibCompression;
      else if (!strcasecmp(ctype, "bzip2"))
        FLAGS_compression_type = leveldb::kBZip2Compression;
      else {
        fprintf(stdout, "Cannot parse %s\n", argv[i]);
      }
1377 1378
    } else if (sscanf(argv[i], "--min_level_to_compress=%d%c", &n, &junk) == 1
        && n >= 0) {
1379
      FLAGS_min_level_to_compress = n;
1380 1381 1382
    } else if (sscanf(argv[i], "--disable_seek_compaction=%d%c", &n, &junk) == 1
        && (n == 0 || n == 1)) {
      FLAGS_disable_seek_compaction = n;
1383 1384
    } else if (sscanf(argv[i], "--delete_obsolete_files_period_micros=%ld%c",
                      &l, &junk) == 1) {
1385
      FLAGS_delete_obsolete_files_period_micros = l;
1386 1387 1388
    } else if (sscanf(argv[i], "--stats_interval=%d%c", &n, &junk) == 1 &&
               n >= 0 && n < 2000000000) {
      FLAGS_stats_interval = n;
1389 1390 1391 1392 1393 1394
    } else if (sscanf(argv[i], "--stats_per_interval=%d%c", &n, &junk) == 1
        && (n == 0 || n == 1)) {
      FLAGS_stats_per_interval = n;
    } else if (sscanf(argv[i], "--rate_limit=%lf%c", &d, &junk) == 1 &&
               d > 1.0) {
      FLAGS_rate_limit = d;
H
heyongqiang 已提交
1395 1396 1397
    } else if (sscanf(argv[i], "--readonly=%d%c", &n, &junk) == 1 &&
        (n == 0 || n ==1 )) {
      FLAGS_read_only = n;
1398
    } else {
J
jorlow@chromium.org 已提交
1399 1400 1401 1402 1403
      fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
      exit(1);
    }
  }

H
heyongqiang 已提交
1404 1405 1406 1407 1408 1409 1410
  // Choose a location for the test database if none given with --db=<path>
  if (FLAGS_db == NULL) {
      leveldb::Env::Default()->GetTestDirectory(&default_db_path);
      default_db_path += "/dbbench";
      FLAGS_db = default_db_path.c_str();
  }

J
jorlow@chromium.org 已提交
1411 1412 1413 1414
  leveldb::Benchmark benchmark;
  benchmark.Run();
  return 0;
}