db_crashtest.py 3.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
import os
import sys
import time
import shlex
import getopt
import logging
import subprocess

# This python script runs and kills db_stress multiple times with
# test-batches-snapshot ON,
# total operations much less than the total keys, and
# a high read percentage.
# This checks consistency in case of unsafe crashes in  Rocksdb

def main(argv):
    os.system("make -C ~/rocksdb db_stress")
    try:
        opts, args = getopt.getopt(argv, "hd:t:i:o:b:")
    except getopt.GetoptError:
        print "db_crashtest.py -d <duration_test> -t <#threads> " \
            "-i <interval for one run> -o <ops_per_thread>\n"
        sys.exit(2)

    # default values, will be overridden by cmdline args
    interval = 120  # time for one db_stress instance to run
    duration = 6000  # total time for this script to test db_stress
    threads = 32
28 29
    # since we will be killing anyway, use large value for ops_per_thread
    ops_per_thread = 10000000
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
    write_buf_size = 4 * 1024 * 1024

    for opt, arg in opts:
        if opt == '-h':
            print "db_crashtest.py -d <duration_test> -t <#threads> " \
                "-i <interval for one run> -o <ops_per_thread> "\
                "-b <write_buffer_size>\n"
            sys.exit()
        elif opt == ("-d"):
            duration = int(arg)
        elif opt == ("-t"):
            threads = int(arg)
        elif opt == ("-i"):
            interval = int(arg)
        elif opt == ("-o"):
            ops_per_thread = int(arg)
        elif opt == ("-b"):
            write_buf_size = int(arg)
        else:
            print "db_crashtest.py -d <duration_test> -t <#threads> " \
                "-i <interval for one run> -o <ops_per_thread> " \
                "-b <write_buffer_size>\n"
            sys.exit(2)

    exit_time = time.time() + duration

    while time.time() < exit_time:
        run_had_errors = False
        print "Running db_stress \n"
        os.system("mkdir -p /tmp/rocksdb/crashtest")
        killtime = time.time() + interval
        child = subprocess.Popen(['~/rocksdb/db_stress \
                        --test_batches_snapshots=1 \
                        --ops_per_thread=0' + str(ops_per_thread) + ' \
                        --threads=0' + str(threads) + ' \
                        --write_buffer_size=' + str(write_buf_size) + '\
66
                        --reopen=0 \
67 68
                        --readpercent=50 \
                        --db=/tmp/rocksdb/crashtest \
69
                        --max_key=1000'], stderr=subprocess.PIPE, shell=True)
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
        time.sleep(interval)
        while True:
            if time.time() > killtime:
                if child.poll() is not None:
                    logging.warn("WARNING: db_stress completed before kill\n")
                else:
                    child.kill()
                    print "KILLED \n"
                    time.sleep(1)  # time to stabilize after a kill

                while True:
                    line = child.stderr.readline().strip()
                    if line != '':
                        run_had_errors = True
                        print '***' + line + '^'
                    else:
                        break
                if run_had_errors:
                    sys.exit(2)
                break

            time.sleep(1)  # time to stabilize before the next run

if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))