[bugfix] fix typo and add bench.py for quick benchmark

fe6e8de9 · guokuankuan · Roy Guo · a39b5cbb · fe6e8de9 · fe6e8de9
隐藏空白更改
内联并排

Showing with 203 addition and 6 deletion

bench.py bench.py +198 -0

build.sh build.sh +1 -1

tools/db_bench_tool.cc tools/db_bench_tool.cc +4 -5

未找到文件。
--- a/bench.py
+++ b/bench.py
+#!/usr/bin/python
+#
+# Copyright (c) 2020-present, Bytedance Inc.  All rights reserved.
+# This source code is licensed under Apache 2.0 License.
+#
+# Usage:
+#       ./bench.py $PWD/data 20 10 24,2000 readrandomwriterandom,10
+#
+#       ./bench.py $PWD/data 20 10 24,2000 readrandomwriterandom,90
+#
+import time
+import io
+import os
+import subprocess
+import sys
+import requests
+from datetime import datetime
+
+# default values
+KV_SIZE = "24, 500"
+KSIZE = 24  # key size form KV_SIZE
+VSIZE = 500 # value size from KV_SIZE
+GB_PER_THREAD = 20
+TOTAL_MEM_IN_GB = 64
+THREADS = 16
+DB_DIR = ""
+BENCH_TYPE = "fillseq"
+BENCH_ARGS = []
+
+# collected result log
+LOG_RESULT_FNAME = "log.txt"
+# bench rocksdb output
+LOG_BENCH_OUTPUT_FNAME = "output.txt"
+
+BENCH_ENGINES = {'terarkdb':'./output/db_bench'}
+
+def bench(records, key_size, value_size, engine, db_dir, exist_db):
+    extra_flags = ''
+    if engine == 'terarkdb':
+        extra_flags = """
+                         --use_terark_table=false
+                         --blob_size=128
+                      """
+
+    if BENCH_TYPE == 'readrandomwriterandom':
+        extra_flags += """
+                         --readwritepercent=%s
+                       """ % BENCH_ARGS[0]
+    cmd = """
+           {db_bench} \
+           --benchmarks={bench_type}
+	   --use_existing_db={exist_db}
+           --sync=1
+	   --db={db_dir}
+	   --wal_dir={db_dir}
+	   --bytes_per_sync=65536
+           --wal_bytes_per_sync=65536
+           --num={records}
+           --threads={threads}
+	   --num_levels=6
+           --delayed_write_rate=209715200
+	   --key_size={key_size}
+	   --value_size={value_size}
+	   --cache_numshardbits=6
+	   --level_compaction_dynamic_level_bytes=true
+	   --cache_index_and_filter_blocks=1
+	   --pin_l0_filter_and_index_blocks_in_cache=0
+	   --benchmark_write_rate_limit=0
+	   --hard_rate_limit=3
+	   --rate_limit_delay_max_milliseconds=1000000
+	   --write_buffer_size=268435456
+	   --max_write_buffer_number=6
+	   --target_file_size_base=134217728
+	   --max_bytes_for_level_base=536870912
+	   --verify_checksum=1
+	   --delete_obsolete_files_period_micros=62914560
+	   --max_bytes_for_level_multiplier=10
+	   --statistics=0
+	   --stats_per_interval=1
+	   --stats_interval_seconds=60
+	   --histogram=1
+	   --open_files=-1
+	   --level0_file_num_compaction_trigger=4
+	   --level0_slowdown_writes_trigger=1000
+	   --level0_stop_writes_trigger=1000
+           --num_high_pri_threads=3
+           --num_low_pri_threads=10
+           --mmap_read=true
+           --compression_type=none
+           --memtablerep=skip_list
+           {extra_flags}
+           """.format(records=records, 
+                      key_size=key_size,
+                      value_size=value_size, 
+                      db_dir=db_dir, 
+                      bench_type=BENCH_TYPE,
+                      exist_db=exist_db,
+                      threads=THREADS,
+                      db_bench=BENCH_ENGINES[engine],
+                      extra_flags=extra_flags)
+
+    cmd = cmd.replace('\n',' ')
+    log = open(LOG_BENCH_OUTPUT_FNAME, 'wb')
+    log.write(cmd)
+    log.flush()
+    process = subprocess.Popen(cmd,
+                               stdin=subprocess.PIPE,
+                               stderr=log, 
+                               stdout=log, 
+                               shell=True)
+    process.communicate()
+    log.flush()
+    log.close()
+    print 'test finished: %s\n' % LOG_BENCH_OUTPUT_FNAME
+
+
+def run(engine, db_dir):
+    db_size_bytes = int(GB_PER_THREAD) * 1024 * 1024 * 1024
+    records = db_size_bytes / (KSIZE + VSIZE)
+    bench(records, KSIZE, VSIZE, engine, db_dir, 0)
+
+
+def gather_result(engine):
+    rst = {}
+    for bench_type in [BENCH_TYPE]:
+        rst[bench_type] = {}
+        
+        rst[bench_type] = {}
+
+        with open(LOG_BENCH_OUTPUT_FNAME, 'rb') as f:
+            lines = f.readlines()
+            i = 0
+            while i < len(lines):
+                # get ops
+                s = '%s' % bench_type
+                if lines[i].find(s) == 0:
+                    rst[bench_type]['ops'] = lines[i].split()[4]
+
+                # get rest of them
+                s = 'Microseconds per '
+                if lines[i].find(s) >= 0:
+                    ops_type = lines[i][17:-2]
+                    rst[bench_type][ops_type] = {}
+                    rst[bench_type][ops_type]['max'] = lines[i+2].split()[5]
+                    rst[bench_type][ops_type]['percentiles'] = lines[i+3][13:-2]
+                    i = i + 5
+                else:
+                    i = i + 1
+
+    # print rst
+    output = [('benchmark', 'kv bytes', 'ops', 'operation', 'max lat(us)', 'pct(us)')]
+    for bench in rst:
+            for t in ['read', 'write']:
+                if rst[bench].has_key(t):
+                    output.append( (bench, KV_SIZE, rst[bench]['ops'], t, rst[bench][t]['max'], rst[bench][t]['percentiles']) )
+
+    with open(LOG_RESULT_FNAME, 'a') as f:
+        for row in output:
+            f.write('{0:<25} {1:<15} {2:<15} {3:<15} {4:<15} {5:<100}\n'.format(*row))
+
+if __name__=='__main__':
+    if not os.path.isfile(BENCH_ENGINES['terarkdb']):
+        print 'db_bench not found, please check: %s', BENCH_ENGINES
+        sys.exit()
+
+    if len(sys.argv) != 6:
+        print 'usage: ./bench.py [DB_DIR] [GB_PER_THREAD] [THREADS] [KV_SIZE] [BENCH_STR]\n'
+        print '\t\tKV_SIZE: 24,500 means key size is 24, value size is 500'
+        print '\t\tBENCH_STR: fillseq or readrandomwriterandom,90, the later one means 90% reads'
+        sys.exit()
+
+    DB_DIR = sys.argv[1]
+    GB_PER_THREAD = sys.argv[2]
+    THREADS = sys.argv[3]
+    KV_SIZE = sys.argv[4]
+    KSIZE, VSIZE = [int(i) for i in KV_SIZE.split(',')]
+
+    BENCH_STR = sys.argv[5].split(",")
+    BENCH_TYPE = BENCH_STR[0]
+    BENCH_ARGS = BENCH_STR[1:]
+    print 'bench_type = %s, bench_args = %s' % (BENCH_TYPE, BENCH_ARGS)
+
+    LOG_RESULT_FNAME = 'rst_%s_gb_%s_thds.txt' % (GB_PER_THREAD, THREADS)
+
+    for engine in ['terarkdb']:
+        LOG_BENCH_OUTPUT_FNAME = "output_%s_%s_%s_%s.txt" % (engine, BENCH_TYPE, KSIZE, VSIZE)
+
+        db_dir = '%s_%s' %  (DB_DIR, engine)
+        print 'start engine : %s, db_dir = %s' % (engine, db_dir)
+
+        with open(LOG_RESULT_FNAME, 'a') as f:
+            f.write('[%s] GB_PER_THREAD: %s, THREADS = %s, KV_SIZE = %s, BENCH_STR = %s, DATA_DIR = %s\n' % (datetime.now().strftime("%Y-%m-%d %H:%M:%S"), GB_PER_THREAD, THREADS, KV_SIZE, BENCH_STR, db_dir) )
+
+        run(engine, db_dir)
+        gather_result(engine)
+
+        with open(LOG_RESULT_FNAME, 'a') as f:
+            f.write('\n\n')
--- a/build.sh
+++ b/build.sh
@@ -6,5 +6,5 @@ mkdir -p $OUTPUT

 git submodule update --init --recursive

-cd $BASE/$OUTPUT && cmake ../ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DWITH_TESTS=OFF -DWITH_TOOLS=ON
+cd $BASE/$OUTPUT && cmake ../ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DWITH_TESTS=OFF -DWITH_TOOLS=ON -DWITH_TERARK_ZIP=OFF
 cd $BASE/$OUTPUT && make -j $(nproc) && make install
--- a/tools/db_bench_tool.cc
+++ b/tools/db_bench_tool.cc
@@ -1084,7 +1084,7 @@ static enum RepFactory StringToRepFactory(const char* ctype) {
    return kHashLinkedList;
  else if (!strcasecmp(ctype, "cuckoo"))
    return kCuckoo;
-  else if (!strcasecmp(ctype, "patricil_trie"))
+  else if (!strcasecmp(ctype, "patricia_trie"))
    return kPatriciaTrie;

  fprintf(stdout, "Cannot parse memreptable %s\n", ctype);
@@ -2162,7 +2162,7 @@ class Benchmark {
        fprintf(stdout, "Memtablerep: cuckoo\n");
        break;
      case kPatriciaTrie:
-        fprintf(stdout, "Memtablerep: patricil_trie\n");
+        fprintf(stdout, "Memtablerep: patricia_trie\n");
        break;
    }
    fprintf(stdout, "Perf Level: %d\n", FLAGS_perf_level);
@@ -3268,9 +3268,8 @@ class Benchmark {
            options.write_buffer_size, FLAGS_key_size + FLAGS_value_size));
        break;
      case kPatriciaTrie:
-        fprintf(stderr, "PatriciaTrie is unsupported now\n");
-        // options.memtable_factory.reset(NewPatriciaTrieRepFactory());
-        // break;
+        options.memtable_factory.reset(NewPatriciaTrieRepFactory());
+        break;
 #else
      default:
        fprintf(stderr, "Only skip list is supported in lite mode\n");