benchmark.sh 11.0 KB
Newer Older
1 2 3 4
#!/bin/bash
# REQUIRE: db_bench binary exists in the current directory

if [ $# -ne 1 ]; then
5
  echo -n "./benchmark.sh [bulkload/fillseq/overwrite/filluniquerandom/"
M
Mark Callaghan 已提交
6
  echo    "readrandom/readwhilewriting/readwhilemerging/updaterandom/mergerandom]"
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
  exit 0
fi

# size constants
K=1024
M=$((1024 * K))
G=$((1024 * M))

if [ -z $DB_DIR ]; then
  echo "DB_DIR is not defined"
  exit 0
fi

if [ -z $WAL_DIR ]; then
  echo "WAL_DIR is not defined"
  exit 0
fi

output_dir=${OUTPUT_DIR:-/tmp/}
if [ ! -d $output_dir ]; then
  mkdir -p $output_dir
fi

30 31 32 33 34 35 36 37
# all multithreaded tests run with sync=1 unless
# $DB_BENCH_NO_SYNC is defined
syncval="1"
if [ ! -z $DB_BENCH_NO_SYNC ]; then
  echo "Turning sync off for all multithreaded tests"
  syncval="0";
fi

38 39 40 41 42 43
num_threads=${NUM_THREADS:-16}
# Only for *whilewriting, *whilemerging
writes_per_second=${WRITES_PER_SECOND:-$((10 * K))}
# Only for tests that do range scans
num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10}
cache_size=${CACHE_SIZE:-$((1 * G))}
44 45 46 47
duration=${DURATION:-0}

num_keys=${NUM_KEYS:-$((1 * G))}
key_size=20
48
value_size=${VALUE_SIZE:-400}
M
Mark Callaghan 已提交
49
block_size=${BLOCK_SIZE:-4096}
50 51 52 53

const_params="
  --db=$DB_DIR \
  --wal_dir=$WAL_DIR \
54
  --disable_data_sync=0 \
55
  \
56
  --num=$num_keys \
57 58 59
  --num_levels=6 \
  --key_size=$key_size \
  --value_size=$value_size \
M
Mark Callaghan 已提交
60
  --block_size=$block_size \
61 62
  --cache_size=$cache_size \
  --cache_numshardbits=6 \
L
Lei Jin 已提交
63
  --compression_type=zlib \
64
  --min_level_to_compress=3 \
65
  --compression_ratio=0.5 \
M
Mark Callaghan 已提交
66 67
  --level_compaction_dynamic_level_bytes=true \
  --bytes_per_sync=$((2 * M)) \
68
  \
69
  --hard_rate_limit=3 \
70 71
  --rate_limit_delay_max_milliseconds=1000000 \
  --write_buffer_size=$((128 * M)) \
72
  --max_write_buffer_number=8 \
73 74 75 76 77
  --target_file_size_base=$((128 * M)) \
  --max_bytes_for_level_base=$((1 * G)) \
  \
  --verify_checksum=1 \
  --delete_obsolete_files_period_micros=$((60 * M)) \
78 79
  --max_grandparent_overlap_factor=8 \
  --max_bytes_for_level_multiplier=8 \
80 81 82
  \
  --statistics=1 \
  --stats_per_interval=1 \
83
  --stats_interval_seconds=60 \
84 85 86 87 88 89 90
  --histogram=1 \
  \
  --memtablerep=skip_list \
  --bloom_bits=10 \
  --open_files=$((20 * K))"

l0_config="
L
Lei Jin 已提交
91
  --level0_file_num_compaction_trigger=4 \
92 93
  --level0_slowdown_writes_trigger=12 \
  --level0_stop_writes_trigger=20"
94 95 96 97 98

if [ $duration -gt 0 ]; then
  const_params="$const_params --duration=$duration"
fi

99 100
params_w="$const_params $l0_config --max_background_compactions=16 --max_background_flushes=7"
params_bulkload="$const_params --max_background_compactions=16 --max_background_flushes=7 \
L
Lei Jin 已提交
101 102 103
                 --level0_file_num_compaction_trigger=$((10 * M)) \
                 --level0_slowdown_writes_trigger=$((10 * M)) \
                 --level0_stop_writes_trigger=$((10 * M))"
104

105 106 107 108 109 110 111 112 113 114 115 116
function summarize_result {
  test_out=$1
  test_name=$2
  bench_name=$3

  uptime=$( grep ^Uptime\(secs $test_out | tail -1 | awk '{ printf "%.0f", $2 }' )
  stall_time=$( grep "^Cumulative stall" $test_out | tail -1  | awk '{  print $3 }' )
  stall_pct=$( grep "^Cumulative stall" $test_out| tail -1  | awk '{  print $5 }' )
  ops_sec=$( grep ^${bench_name} $test_out | awk '{ print $5 }' )
  mb_sec=$( grep ^${bench_name} $test_out | awk '{ print $7 }' )
  lo_wgb=$( grep "^  L0" $test_out | tail -1 | awk '{ print $8 }' )
  sum_wgb=$( grep "^ Sum" $test_out | tail -1 | awk '{ print $8 }' )
117
  sum_size=$( grep "^ Sum" $test_out | tail -1 | awk '{ printf "%.1f", $3 / 1024.0 }' )
118 119 120 121 122
  wamp=$( echo "scale=1; $sum_wgb / $lo_wgb" | bc )
  wmb_ps=$( echo "scale=1; ( $sum_wgb * 1024.0 ) / $uptime" | bc )
  usecs_op=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $3 }' )
  p50=$( grep "^Percentiles:" $test_out | awk '{ printf "%.1f", $3 }' )
  p75=$( grep "^Percentiles:" $test_out | awk '{ printf "%.1f", $5 }' )
123 124 125
  p99=$( grep "^Percentiles:" $test_out | awk '{ printf "%.0f", $7 }' )
  p999=$( grep "^Percentiles:" $test_out | awk '{ printf "%.0f", $9 }' )
  p9999=$( grep "^Percentiles:" $test_out | awk '{ printf "%.0f", $11 }' )
126
  echo -e "$ops_sec\t$mb_sec\t$sum_size\t$lo_wgb\t$sum_wgb\t$wamp\t$wmb_ps\t$usecs_op\t$p50\t$p75\t$p99\t$p999\t$p9999\t$uptime\t$stall_time\t$stall_pct\t$test_name" \
127 128 129
    >> $output_dir/report.txt
}

130
function run_bulkload {
131 132
  # This runs with a vector memtable and the WAL disabled to load faster. It is still crash safe and the
  # client can discover where to restart a load after a crash. I think this is a good way to load.
133 134
  echo "Bulk loading $num_keys random keys"
  cmd="./db_bench --benchmarks=fillrandom \
135 136
       --use_existing_db=0 \
       --disable_auto_compactions=1 \
L
Lei Jin 已提交
137
       --sync=0 \
138 139
       $params_bulkload \
       --threads=1 \
140 141
       --memtablerep=vector \
       --disable_wal=1 \
M
Mark Callaghan 已提交
142
       --seed=$( date +%s ) \
143
       2>&1 | tee -a $output_dir/benchmark_bulkload_fillrandom.log"
144 145
  echo $cmd | tee $output_dir/benchmark_bulkload_fillrandom.log
  eval $cmd
146
  summarize_result $output_dir/benchmark_bulkload_fillrandom.log bulkload fillrandom
147
  echo "Compacting..."
148
  cmd="./db_bench --benchmarks=compact \
149 150
       --use_existing_db=1 \
       --disable_auto_compactions=1 \
L
Lei Jin 已提交
151
       --sync=0 \
152 153 154
       $params_w \
       --threads=1 \
       2>&1 | tee -a $output_dir/benchmark_bulkload_compact.log"
155 156 157 158 159
  echo $cmd | tee $output_dir/benchmark_bulkload_compact.log
  eval $cmd
}

function run_fillseq {
160 161
  # This runs with a vector memtable and the WAL disabled to load faster. It is still crash safe and the
  # client can discover where to restart a load after a crash. I think this is a good way to load.
162 163
  echo "Loading $num_keys keys sequentially"
  cmd="./db_bench --benchmarks=fillseq \
164
       --use_existing_db=0 \
165
       --sync=0 \
166
       $params_w \
167
       --min_level_to_compress=0 \
168
       --threads=1 \
169 170
       --memtablerep=vector \
       --disable_wal=1 \
M
Mark Callaghan 已提交
171
       --seed=$( date +%s ) \
M
Mark Callaghan 已提交
172 173
       2>&1 | tee -a $output_dir/benchmark_fillseq.v${value_size}.log"
  echo $cmd | tee $output_dir/benchmark_fillseq.v${value_size}.log
174
  eval $cmd
M
Mark Callaghan 已提交
175
  summarize_result $output_dir/benchmark_fillseq.v${value_size}.log fillseq.v${value_size} fillseq
176 177
}

178 179 180 181 182
function run_change {
  operation=$1
  echo "Do $num_keys random $operation"
  out_name="benchmark_${operation}.t${num_threads}.s${syncval}.log"
  cmd="./db_bench --benchmarks=$operation \
183
       --use_existing_db=1 \
184 185 186 187
       --sync=$syncval \
       $params_w \
       --threads=$num_threads \
       --merge_operator=\"put\" \
M
Mark Callaghan 已提交
188
       --seed=$( date +%s ) \
189 190
       2>&1 | tee -a $output_dir/${out_name}"
  echo $cmd | tee $output_dir/${out_name}
191
  eval $cmd
192
  summarize_result $output_dir/${out_name} ${operation}.t${num_threads}.s${syncval} $operation
193 194 195
}

function run_filluniquerandom {
196 197
  echo "Loading $num_keys unique keys randomly"
  cmd="./db_bench --benchmarks=filluniquerandom \
198
       --use_existing_db=0 \
199
       --sync=0 \
200 201
       $params_w \
       --threads=1 \
M
Mark Callaghan 已提交
202
       --seed=$( date +%s ) \
203
       2>&1 | tee -a $output_dir/benchmark_filluniquerandom.log"
204 205
  echo $cmd | tee $output_dir/benchmark_filluniquerandom.log
  eval $cmd
206
  summarize_result $output_dir/benchmark_filluniquerandom.log filluniquerandom filluniquerandom
207 208 209
}

function run_readrandom {
210 211 212
  echo "Reading $num_keys random keys"
  out_name="benchmark_readrandom.t${num_threads}.log"
  cmd="./db_bench --benchmarks=readrandom \
213
       --use_existing_db=1 \
214 215
       $params_w \
       --threads=$num_threads \
M
Mark Callaghan 已提交
216
       --seed=$( date +%s ) \
217 218
       2>&1 | tee -a $output_dir/${out_name}"
  echo $cmd | tee $output_dir/${out_name}
219
  eval $cmd
220
  summarize_result $output_dir/${out_name} readrandom.t${num_threads} readrandom
221 222
}

223 224 225 226 227
function run_readwhile {
  operation=$1
  echo "Reading $num_keys random keys while $operation"
  out_name="benchmark_readwhile${operation}.t${num_threads}.log"
  cmd="./db_bench --benchmarks=readwhile${operation} \
M
Mark Callaghan 已提交
228 229
       --use_existing_db=1 \
       --sync=$syncval \
230 231
       $params_w \
       --threads=$num_threads \
M
Mark Callaghan 已提交
232 233
       --writes_per_second=$writes_per_second \
       --merge_operator=\"put\" \
M
Mark Callaghan 已提交
234
       --seed=$( date +%s ) \
235 236
       2>&1 | tee -a $output_dir/${out_name}"
  echo $cmd | tee $output_dir/${out_name}
M
Mark Callaghan 已提交
237
  eval $cmd
238
  summarize_result $output_dir/${out_name} readwhile${operation}.t${num_threads} readwhile${operation}
M
Mark Callaghan 已提交
239 240
}

241 242 243 244 245 246 247
function run_rangewhile {
  operation=$1
  full_name=$2
  reverse_arg=$3
  out_name="benchmark_${full_name}.t${num_threads}.log"
  echo "Range scan $num_keys random keys while ${operation} for reverse_iter=${reverse_arg}"
  cmd="./db_bench --benchmarks=seekrandomwhile${operation} \
L
Lei Jin 已提交
248
       --use_existing_db=1 \
249
       --sync=$syncval \
250 251
       $params_w \
       --threads=$num_threads \
L
Lei Jin 已提交
252
       --writes_per_second=$writes_per_second \
253
       --merge_operator=\"put\" \
L
Lei Jin 已提交
254
       --seek_nexts=$num_nexts_per_seek \
255
       --reverse_iterator=$reverse_arg \
M
Mark Callaghan 已提交
256
       --seed=$( date +%s ) \
257 258
       2>&1 | tee -a $output_dir/${out_name}"
  echo $cmd | tee $output_dir/${out_name}
L
Lei Jin 已提交
259
  eval $cmd
260
  summarize_result $output_dir/${out_name} ${full_name}.t${num_threads} seekrandomwhile${operation}
L
Lei Jin 已提交
261 262
}

263 264 265 266 267 268
function run_range {
  full_name=$1
  reverse_arg=$2
  out_name="benchmark_${full_name}.t${num_threads}.log"
  echo "Range scan $num_keys random keys for reverse_iter=${reverse_arg}"
  cmd="./db_bench --benchmarks=seekrandom \
269
       --use_existing_db=1 \
270 271 272 273
       $params_w \
       --threads=$num_threads \
       --seek_nexts=$num_nexts_per_seek \
       --reverse_iterator=$reverse_arg \
M
Mark Callaghan 已提交
274
       --seed=$( date +%s ) \
275 276
       2>&1 | tee -a $output_dir/${out_name}"
  echo $cmd | tee $output_dir/${out_name}
277
  eval $cmd
278
  summarize_result $output_dir/${out_name} ${full_name}.t${num_threads} seekrandom
279 280
}

281 282 283 284 285
function now() {
  echo `date +"%s"`
}

report="$output_dir/report.txt"
286
schedule="$output_dir/schedule.txt"
287 288 289 290 291 292

echo "===== Benchmark ====="

# Run!!!
IFS=',' read -a jobs <<< $1
for job in ${jobs[@]}; do
293 294

  if [ $job != debug ]; then
295
    echo "Start $job at `date`" | tee -a $schedule
296 297
  fi

298 299 300 301 302 303
  start=$(now)
  if [ $job = bulkload ]; then
    run_bulkload
  elif [ $job = fillseq ]; then
    run_fillseq
  elif [ $job = overwrite ]; then
304 305 306 307 308
    run_change overwrite
  elif [ $job = updaterandom ]; then
    run_change updaterandom
  elif [ $job = mergerandom ]; then
    run_change mergerandom
309 310 311 312
  elif [ $job = filluniquerandom ]; then
    run_filluniquerandom
  elif [ $job = readrandom ]; then
    run_readrandom
313 314 315 316
  elif [ $job = fwdrange ]; then
    run_range $job false
  elif [ $job = revrange ]; then
    run_range $job true
317
  elif [ $job = readwhilewriting ]; then
318
    run_readwhile writing
M
Mark Callaghan 已提交
319
  elif [ $job = readwhilemerging ]; then
320 321 322 323 324 325 326 327 328
    run_readwhile merging
  elif [ $job = fwdrangewhilewriting ]; then
    run_rangewhile writing $job false
  elif [ $job = revrangewhilewriting ]; then
    run_rangewhile writing $job true
  elif [ $job = fwdrangewhilemerging ]; then
    run_rangewhile merging $job false
  elif [ $job = revrangewhilemerging ]; then
    run_rangewhile merging $job true
329
  elif [ $job = debug ]; then
330
    num_keys=1000; # debug
331
    echo "Setting num_keys to $num_keys"
332 333 334 335 336 337
  else
    echo "unknown job $job"
    exit
  fi
  end=$(now)

338
  if [ $job != debug ]; then
339
    echo "Complete $job in $((end-start)) seconds" | tee -a $schedule
340 341
  fi

342
  echo -e "ops/sec\tmb/sec\tSize-GB\tL0_MB\tSum_GB\tW-Amp\tW-MB/s\tusec/op\tp50\tp75\tp99\tp99.9\tp99.99\tUptime\tStall-time\tStall%\tTest"
343 344
  tail -1 $output_dir/report.txt

345
done