db_crashtest.py 3.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
import os
import sys
import time
import shlex
import getopt
import logging
import subprocess

# This python script runs and kills db_stress multiple times with
# test-batches-snapshot ON,
# total operations much less than the total keys, and
# a high read percentage.
# This checks consistency in case of unsafe crashes in  Rocksdb

def main(argv):
    os.system("make -C ~/rocksdb db_stress")
    try:
        opts, args = getopt.getopt(argv, "hd:t:i:o:b:")
    except getopt.GetoptError:
        print "db_crashtest.py -d <duration_test> -t <#threads> " \
            "-i <interval for one run> -o <ops_per_thread>\n"
        sys.exit(2)

    # default values, will be overridden by cmdline args
    interval = 120  # time for one db_stress instance to run
    duration = 6000  # total time for this script to test db_stress
    threads = 32
    ops_per_thread = 500000
    write_buf_size = 4 * 1024 * 1024

    for opt, arg in opts:
        if opt == '-h':
            print "db_crashtest.py -d <duration_test> -t <#threads> " \
                "-i <interval for one run> -o <ops_per_thread> "\
                "-b <write_buffer_size>\n"
            sys.exit()
        elif opt == ("-d"):
            duration = int(arg)
        elif opt == ("-t"):
            threads = int(arg)
        elif opt == ("-i"):
            interval = int(arg)
        elif opt == ("-o"):
            ops_per_thread = int(arg)
        elif opt == ("-b"):
            write_buf_size = int(arg)
        else:
            print "db_crashtest.py -d <duration_test> -t <#threads> " \
                "-i <interval for one run> -o <ops_per_thread> " \
                "-b <write_buffer_size>\n"
            sys.exit(2)

    exit_time = time.time() + duration

    while time.time() < exit_time:
        run_had_errors = False
        print "Running db_stress \n"
        os.system("mkdir -p /tmp/rocksdb/crashtest")
        killtime = time.time() + interval
        child = subprocess.Popen(['~/rocksdb/db_stress \
                        --test_batches_snapshots=1 \
                        --ops_per_thread=0' + str(ops_per_thread) + ' \
                        --threads=0' + str(threads) + ' \
                        --write_buffer_size=' + str(write_buf_size) + '\
                        --reopen=10 \
                        --readpercent=50 \
                        --db=/tmp/rocksdb/crashtest \
                        --max_key=100'], stderr=subprocess.PIPE, shell=True)
        time.sleep(interval)
        while True:
            if time.time() > killtime:
                if child.poll() is not None:
                    logging.warn("WARNING: db_stress completed before kill\n")
                else:
                    child.kill()
                    print "KILLED \n"
                    time.sleep(1)  # time to stabilize after a kill

                while True:
                    line = child.stderr.readline().strip()
                    if line != '':
                        run_had_errors = True
                        print '***' + line + '^'
                    else:
                        break
                if run_had_errors:
                    sys.exit(2)
                break

            time.sleep(1)  # time to stabilize before the next run

if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))