clickhouse-test 21.8 KB
Newer Older
1 2 3 4 5 6 7 8 9
#!/usr/bin/env python
import sys
import os
import os.path
import re

from argparse import ArgumentParser
from argparse import FileType
from pprint import pprint
10
import shlex
11
import subprocess
12 13 14 15
from subprocess import check_call
from subprocess import Popen
from subprocess import PIPE
from subprocess import CalledProcessError
16 17 18
from datetime import datetime
from time import sleep
from errno import ESRCH
19
import termcolor
20
from random import random
P
proller 已提交
21
import commands
P
proller 已提交
22
import multiprocessing
P
proller 已提交
23
from contextlib import closing
24

25

A
alesapin 已提交
26
MESSAGES_TO_RETRY = [
27
    "DB::Exception: ZooKeeper session has been expired",
A
alesapin 已提交
28 29 30
    "Coordination::Exception: Connection loss",
]

31

32 33 34 35 36 37 38 39
def remove_control_characters(s):
    """
    https://github.com/html5lib/html5lib-python/issues/96#issuecomment-43438438
    """
    def str_to_int(s, default, base=10):
        if int(s, base) < 0x10000:
            return unichr(int(s, base))
        return default
40 41 42
    s = re.sub(r"&#(\d+);?", lambda c: str_to_int(c.group(1), c.group(0)), s)
    s = re.sub(r"&#[xX]([0-9a-fA-F]+);?", lambda c: str_to_int(c.group(1), c.group(0), base=16), s)
    s = re.sub(r"[\x00-\x08\x0b\x0e-\x1f\x7f]", "", s)
43 44
    return s

A
alesapin 已提交
45 46
def run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file):
    if ext == '.sql':
47
        command = "{0} --send_logs_level={1} --testmode --multiquery < {2} > {3} 2> {4}".format(args.client_with_database, server_logs_level, case_file, stdout_file, stderr_file)
A
alesapin 已提交
48 49 50 51 52 53 54 55
    else:
        command = "{} > {} 2> {}".format(case_file, stdout_file, stderr_file)

    proc = Popen(command, shell = True)
    start_time = datetime.now()
    while (datetime.now() - start_time).total_seconds() < args.timeout and proc.poll() is None:
        sleep(0.01)

56 57 58
    os.system("sed -i 's/{test_db}/default/g' {file}".format(test_db=args.database, file=stdout_file))
    os.system("sed -i 's/{test_db}/default/g' {file}".format(test_db=args.database, file=stderr_file))

A
alesapin 已提交
59 60 61 62 63 64 65 66 67 68
    stdout = open(stdout_file, 'r').read() if os.path.exists(stdout_file) else ''
    stdout = unicode(stdout, errors='replace', encoding='utf-8')
    stderr = open(stderr_file, 'r').read() if os.path.exists(stderr_file) else ''
    stderr = unicode(stderr, errors='replace', encoding='utf-8')

    return proc, stdout, stderr

def need_retry(stderr):
    return any(msg in stderr for msg in MESSAGES_TO_RETRY)

69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
def get_processlist(client_cmd):
    try:
        return subprocess.check_output("{} --query 'SHOW PROCESSLIST FORMAT Vertical'".format(client_cmd), shell=True)
    except:
        return "" #  server seems dead

def get_stacktraces(server_pid):
    cmd = "gdb -q -ex 'set pagination off' -ex 'backtrace' -ex 'thread apply all backtrace' -ex 'detach' -ex 'quit' --pid {} 2>/dev/null".format(server_pid)
    try:
        return subprocess.check_output(cmd, shell=True)
    except Exception as ex:
        return "Error occured while receiving stack traces {}".format(str(ex))

def get_server_pid(server_tcp_port):
    cmd = "lsof -i tcp:{port} | grep '*:{port}'".format(port=server_tcp_port)
    try:
        output = subprocess.check_output(cmd, shell=True)
        if output:
            columns = output.strip().split(' ')
            return int(columns[1])
        else:
            return None # server dead
    except Exception as ex:
        return None

P
proller 已提交
94 95 96 97 98 99 100 101 102
def colored(text, args, color=None, on_color=None, attrs=None):
       if sys.stdout.isatty() or args.force_color:
           return termcolor.colored(text, color, on_color, attrs)
       else:
           return text

SERVER_DIED = False
exit_code = 0

103

P
proller 已提交
104 105 106
#def run_tests_array(all_tests, suite, suite_dir, suite_tmp_dir, run_total):
def run_tests_array(all_tests_with_params):
    all_tests, suite, suite_dir, suite_tmp_dir, run_total = all_tests_with_params
P
proller 已提交
107
    global exit_code
P
proller 已提交
108
    global SERVER_DIED
109

P
proller 已提交
110 111 112 113 114 115 116 117 118 119 120 121 122
    OP_SQUARE_BRACKET = colored("[", args, attrs=['bold'])
    CL_SQUARE_BRACKET = colored("]", args, attrs=['bold'])

    MSG_FAIL = OP_SQUARE_BRACKET + colored(" FAIL ", args, "red", attrs=['bold']) + CL_SQUARE_BRACKET
    MSG_UNKNOWN = OP_SQUARE_BRACKET + colored(" UNKNOWN ", args, "yellow", attrs=['bold']) + CL_SQUARE_BRACKET
    MSG_OK = OP_SQUARE_BRACKET + colored(" OK ", args, "green", attrs=['bold']) + CL_SQUARE_BRACKET
    MSG_SKIPPED = OP_SQUARE_BRACKET + colored(" SKIPPED ", args, "cyan", attrs=['bold']) + CL_SQUARE_BRACKET

    passed_total = 0
    skipped_total = 0
    failures_total = 0
    failures = 0
    failures_chain = 0
123

P
proller 已提交
124 125
    if len(all_tests):
        print("\nRunning {} {} tests.".format(len(all_tests), suite) + "\n")
A
alesapin 已提交
126

P
proller 已提交
127 128 129
    for case in all_tests:
        if SERVER_DIED:
            break
A
alesapin 已提交
130

P
proller 已提交
131 132 133 134 135 136 137 138 139 140 141 142 143 144
        case_file = os.path.join(suite_dir, case)
        (name, ext) = os.path.splitext(case)

        try:
            sys.stdout.write("{0:72}".format(name + ": "))
            if run_total == 1:
                sys.stdout.flush()

            if args.skip and any(s in name for s in args.skip):
                print(MSG_SKIPPED + " - skip")
                skipped_total += 1
            elif not args.zookeeper and 'zookeeper' in name:
                print(MSG_SKIPPED + " - no zookeeper")
                skipped_total += 1
A
akuzm 已提交
145 146 147
            elif not args.shard and ('shard' in name
                    or 'distributed' in name
                    or 'global' in name):
P
proller 已提交
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
                print(MSG_SKIPPED + " - no shard")
                skipped_total += 1
            elif not args.no_long and 'long' in name:
                print(MSG_SKIPPED + " - no long")
                skipped_total += 1
            else:
                disabled_file = os.path.join(suite_dir, name) + '.disabled'

                if os.path.exists(disabled_file) and not args.disabled:
                    message = open(disabled_file, 'r').read()
                    print(MSG_SKIPPED + " - " + message)
                else:

                    if args.testname:
                        clickhouse_proc = Popen(shlex.split(args.client_with_database), stdin=PIPE, stdout=PIPE, stderr=PIPE)
                        clickhouse_proc.communicate("SELECT 'Running test {suite}/{case} from pid={pid}';".format(pid = os.getpid(), case = case, suite = suite))

                    reference_file = os.path.join(suite_dir, name) + '.reference'
                    stdout_file = os.path.join(suite_tmp_dir, name) + '.stdout'
                    stderr_file = os.path.join(suite_tmp_dir, name) + '.stderr'

                    proc, stdout, stderr = run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file)
                    if proc.returncode is None:
                        try:
                            proc.kill()
                        except OSError as e:
                            if e.errno != ESRCH:
                                raise

                        failures += 1
178
                        print("{0} - Timeout!".format(MSG_FAIL))
P
proller 已提交
179 180 181 182 183 184 185 186 187 188 189 190
                    else:
                        counter = 1
                        while proc.returncode != 0 and need_retry(stderr):
                            proc, stdout, stderr = run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file)
                            sleep(2**counter)
                            counter += 1
                            if counter > 6:
                                break

                        if proc.returncode != 0:
                            failures += 1
                            failures_chain += 1
191
                            print("{0} - return code {1}".format(MSG_FAIL, proc.returncode))
P
proller 已提交
192 193

                            if stderr:
194
                                print(stderr.encode('utf-8'))
P
proller 已提交
195 196 197 198 199 200 201

                            if args.stop and ('Connection refused' in stderr or 'Attempt to read after eof' in stderr) and not 'Received exception from server' in stderr:
                                SERVER_DIED = True

                        elif stderr:
                            failures += 1
                            failures_chain += 1
202
                            print("{0} - having stderror:\n{1}".format(MSG_FAIL, stderr.encode('utf-8')))
P
proller 已提交
203 204 205
                        elif 'Exception' in stdout:
                            failures += 1
                            failures_chain += 1
206
                            print("{0} - having exception:\n{1}".format(MSG_FAIL, stdout.encode('utf-8')))
P
proller 已提交
207
                        elif not os.path.isfile(reference_file):
208
                            print("{0} - no reference file".format(MSG_UNKNOWN))
P
proller 已提交
209 210 211 212 213 214
                        else:
                            result_is_different = subprocess.call(['diff', '-q', reference_file, stdout_file], stdout = PIPE)

                            if result_is_different:
                                diff = Popen(['diff', '--unified', reference_file, stdout_file], stdout = PIPE).communicate()[0]
                                failures += 1
215
                                print("{0} - result differs with reference:\n{1}".format(MSG_FAIL, diff))
P
proller 已提交
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
                            else:
                                passed_total += 1
                                failures_chain = 0
                                print(MSG_OK)
                                if os.path.exists(stdout_file):
                                    os.remove(stdout_file)
                                if os.path.exists(stderr_file):
                                    os.remove(stderr_file)
        except KeyboardInterrupt as e:
            print(colored("Break tests execution", args, "red"))
            raise e
        except:
            import traceback
            exc_type, exc_value, tb = sys.exc_info()
            failures += 1
231
            print("{0} - Test internal error: {1}\n{2}\n{3}".format(MSG_FAIL, exc_type.__name__, exc_value, "\n".join(traceback.format_tb(tb, 10))))
P
proller 已提交
232 233 234 235 236 237 238

        if failures_chain >= 20:
            break

    failures_total = failures_total + failures

    if failures_total > 0:
239
        print(colored("\nHaving {failures_total} errors! {passed_total} tests passed. {skipped_total} tests skipped.".format(passed_total = passed_total, skipped_total = skipped_total, failures_total = failures_total), args, "red", attrs=["bold"]))
P
proller 已提交
240 241 242
        exit_code = 1
    else:
        print(colored("\n{passed_total} tests passed. {skipped_total} tests skipped.".format(passed_total = passed_total, skipped_total = skipped_total), args, "green", attrs=["bold"]))
A
alesapin 已提交
243

P
proller 已提交
244 245 246 247 248 249
server_logs_level = "warning"

def main(args):
    global SERVER_DIED
    global exit_code
    global server_logs_level
A
alesapin 已提交
250

251
    def is_data_present():
P
proller 已提交
252 253
        clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
        (stdout, stderr) = clickhouse_proc.communicate("EXISTS TABLE test.hits")
P
proller 已提交
254
        if clickhouse_proc.returncode != 0:
P
proller 已提交
255
            raise CalledProcessError(clickhouse_proc.returncode, args.client, stderr)
256 257 258

        return stdout.startswith('1')

P
proller 已提交
259 260 261
    base_dir = os.path.abspath(args.queries)
    tmp_dir = os.path.abspath(args.tmp)

262
    # Keep same default values as in queries/shell_config.sh
P
proller 已提交
263
    os.environ.setdefault("CLICKHOUSE_BINARY", args.binary)
P
proller 已提交
264
    #os.environ.setdefault("CLICKHOUSE_CLIENT", args.client)
265
    os.environ.setdefault("CLICKHOUSE_CONFIG", args.configserver)
P
proller 已提交
266 267
    if args.configclient:
        os.environ.setdefault("CLICKHOUSE_CONFIG_CLIENT", args.configclient)
P
proller 已提交
268
    os.environ.setdefault("CLICKHOUSE_TMP", tmp_dir)
269
    os.environ.setdefault("CLICKHOUSE_DATABASE", args.database)
270

271
    # Force to print server warnings in stderr
272
    # Shell scripts could change logging level
273 274
    os.environ.setdefault("CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL", server_logs_level)

275
    if args.zookeeper is None:
P
proller 已提交
276
        code, out = commands.getstatusoutput(args.extract_from_config +" --try --config " + args.configserver + ' --key zookeeper | grep . | wc -l')
277 278 279 280 281 282
        try:
            if int(out) > 0:
                args.zookeeper = True
            else:
                args.zookeeper = False
        except ValueError:
283 284 285
            args.zookeeper = False

    if args.shard is None:
P
proller 已提交
286
        code, out = commands.getstatusoutput(args.extract_from_config + " --try --config " + args.configserver + ' --key listen_host | grep -E "127.0.0.2|::"')
P
proller 已提交
287
        if out:
P
proller 已提交
288
            args.shard = True
P
proller 已提交
289 290
        else:
            args.shard = False
291

P
proller 已提交
292
    clickhouse_proc_create = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
293
    clickhouse_proc_create.communicate("CREATE DATABASE IF NOT EXISTS " + args.database)
294 295 296
    if args.database != "test":
        clickhouse_proc_create = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
        clickhouse_proc_create.communicate("CREATE DATABASE IF NOT EXISTS test")
297 298 299 300 301

    def is_test_from_dir(suite_dir, case):
        case_file = os.path.join(suite_dir, case)
        (name, ext) = os.path.splitext(case)
        return os.path.isfile(case_file) and (ext == '.sql' or ext == '.sh' or ext == '.py')
P
proller 已提交
302

303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
    def sute_key_func(item):
       if args.order == 'random':
             return random()

       if -1 == item.find('_'):
           return 99998

       prefix, suffix = item.split('_', 1)

       try:
           return int(prefix), suffix
       except ValueError:
           return 99997

    for suite in sorted(os.listdir(base_dir), key=sute_key_func):
318 319 320 321 322 323 324
        if SERVER_DIED:
            break

        suite_dir = os.path.join(base_dir, suite)
        suite_re_obj = re.search('^[0-9]+_(.*)$', suite)
        if not suite_re_obj: #skip .gitignore and so on
            continue
325 326 327 328 329

        suite_tmp_dir = os.path.join(tmp_dir, suite)
        if not os.path.exists(suite_tmp_dir):
            os.makedirs(suite_tmp_dir)

330 331 332
        suite = suite_re_obj.group(1)
        if os.path.isdir(suite_dir):

P
proller 已提交
333
            if 'stateful' in suite and not args.no_stateful and not is_data_present():
334
                print("Won't run stateful tests because test data wasn't loaded.")
335
                continue
336 337 338
            if 'stateless' in suite and args.no_stateless:
                print("Won't run stateless tests because they were manually disabled.")
                continue
339 340 341
            if 'stateful' in suite and args.no_stateful:
                print("Won't run stateful tests because they were manually disabled.")
                continue
342

P
proller 已提交
343 344 345
            # Reverse sort order: we want run newest test first.
            # And not reverse subtests
            def key_func(item):
346
                if args.order == 'random':
347
                    return random()
348 349 350 351 352 353

                reverse = 1 if args.order == 'asc' else -1

                if -1 == item.find('_'):
                    return 99998

P
proller 已提交
354
                prefix, suffix = item.split('_', 1)
355 356 357 358 359 360

                try:
                    return reverse * int(prefix), suffix
                except ValueError:
                    return 99997

361 362 363 364
            all_tests = os.listdir(suite_dir)
            all_tests = filter(lambda case: is_test_from_dir(suite_dir, case), all_tests)
            all_tests = sorted(filter(lambda case: re.search(args.test, case) if args.test else True, all_tests), key=key_func)

P
proller 已提交
365 366 367 368 369 370 371 372
            run_n, run_total = args.parallel.split('/')
            run_n = float(run_n)
            run_total = float(run_total)
            tests_n = len(all_tests)
            if run_total > tests_n:
                run_total = tests_n
            if run_n > run_total:
                continue
373

P
proller 已提交
374
            jobs = args.jobs
P
proller 已提交
375 376
            if jobs > tests_n:
                jobs = tests_n
P
proller 已提交
377 378
            if jobs > run_total:
                run_total = jobs
P
proller 已提交
379

P
proller 已提交
380 381 382 383 384
            all_tests_array = []
            for n in range(1, 1 + int(run_total)):
                start = int(tests_n / run_total * (n - 1))
                end = int(tests_n / run_total * n)
                all_tests_array.append([all_tests[start : end], suite, suite_dir, suite_tmp_dir, run_total])
P
proller 已提交
385

P
proller 已提交
386
            if jobs > 1:
P
proller 已提交
387
                with closing(multiprocessing.Pool(processes=jobs)) as pool:
P
proller 已提交
388 389 390 391
                    pool.map(run_tests_array, all_tests_array)
                    pool.terminate()
            else:
                run_tests_array(all_tests_array[int(run_n)-1])
392 393

    if args.hung_check:
394
        processlist = get_processlist(args.client_with_database)
395 396
        if processlist:
            server_pid = get_server_pid(os.getenv("CLICKHOUSE_PORT_TCP", '9000'))
397 398
            print(colored("\nFound hung queries in processlist:", args, "red", attrs=["bold"]))
            print(processlist)
399
            if server_pid:
400 401
                print("\nStacktraces of all threads:")
                print(get_stacktraces(server_pid))
402
            exit_code = 1
403
        else:
P
proller 已提交
404
            print(colored("\nNo queries hung.", args, "green", attrs=["bold"]))
405 406 407

    sys.exit(exit_code)

408

A
alesapin 已提交
409
def find_binary(name):
410 411
    if os.path.exists(name) and os.access(name, os.X_OK):
        return True
A
alesapin 已提交
412 413 414 415 416 417 418
    paths = os.environ.get("PATH").split(':')
    for path in paths:
        if os.access(os.path.join(path, name), os.X_OK):
            return True

    # maybe it wasn't in PATH
    return os.access(os.path.join('/usr/bin', name), os.X_OK)
419 420

if __name__ == '__main__':
421 422 423
    parser=ArgumentParser(description='ClickHouse functional tests')
    parser.add_argument('-q', '--queries', help='Path to queries dir')
    parser.add_argument('--tmp', help='Path to tmp dir')
424
    parser.add_argument('-b', '--binary', default='clickhouse', help='Path to clickhouse binary or name of binary in PATH')
425 426 427 428 429 430 431 432 433 434 435 436
    parser.add_argument('-c', '--client', help='Client program')
    parser.add_argument('--extract_from_config', help='extract-from-config program')
    parser.add_argument('--configclient', help='Client config (if you use not default ports)')
    parser.add_argument('--configserver', default= '/etc/clickhouse-server/config.xml', help='Preprocessed server config')
    parser.add_argument('-o', '--output', help='Output xUnit compliant test report directory')
    parser.add_argument('-t', '--timeout', type=int, default=600, help='Timeout for each test case in seconds')
    parser.add_argument('test', nargs='?', help='Optional test case name regex')
    parser.add_argument('-d', '--disabled', action='store_true', default=False, help='Also run disabled tests')
    parser.add_argument('--stop', action='store_true', default=None, dest='stop', help='Stop on network errors')
    parser.add_argument('--order', default='desc', help='Run order (asc, desc, random)')
    parser.add_argument('--testname', action='store_true', default=None, dest='testname', help='Make query with test name before test run')
    parser.add_argument('--hung-check', action='store_true', default=False)
A
alesapin 已提交
437
    parser.add_argument('--force-color', action='store_true', default=False)
438
    parser.add_argument('--database', help='Database for tests (random name test_XXXXXX by default)')
P
proller 已提交
439
    parser.add_argument('--parallel', default='1/1', help='One parallel test run number/total')
P
proller 已提交
440
    parser.add_argument('-j', '--jobs', default=1, help='Run all tests in parallel', type=int) # default=multiprocessing.cpu_count()
441 442

    parser.add_argument('--no-stateless', action='store_true', help='Disable all stateless tests')
443
    parser.add_argument('--no-stateful', action='store_true', help='Disable all stateful tests')
444 445 446 447 448 449 450 451
    parser.add_argument('--skip', nargs='+', help="Skip these tests")
    parser.add_argument('--no-long', action='store_false', dest='no_long', help='Do not run long tests')
    group=parser.add_mutually_exclusive_group(required=False)
    group.add_argument('--zookeeper', action='store_true', default=None, dest='zookeeper', help='Run zookeeper related tests')
    group.add_argument('--no-zookeeper', action='store_false', default=None, dest='zookeeper', help='Do not run zookeeper related tests')
    group=parser.add_mutually_exclusive_group(required=False)
    group.add_argument('--shard', action='store_true', default=None, dest='shard', help='Run sharding related tests (required to clickhouse-server listen 127.0.0.2 127.0.0.3)')
    group.add_argument('--no-shard', action='store_false', default=None, dest='shard', help='Do not run shard related tests')
452 453

    args = parser.parse_args()
454 455 456

    if args.queries is None and os.path.isdir('queries'):
        args.queries = 'queries'
P
proller 已提交
457
    elif args.queries is None:
P
proller 已提交
458 459 460 461
        if (os.path.isdir('/usr/local/share/clickhouse-test/queries')):
            args.queries = '/usr/local/share/clickhouse-test/queries'
        if (args.queries is None and os.path.isdir('/usr/share/clickhouse-test/queries')):
            args.queries = '/usr/share/clickhouse-test/queries'
462 463
        if args.tmp is None:
            args.tmp = '/tmp/clickhouse-test'
A
akuzm 已提交
464 465 466
    if args.queries is None:
        print_err("Failed to detect path to the queries directory. Please specify it with '--queries' option.")
        exit(1)
P
proller 已提交
467 468
    if args.tmp is None:
        args.tmp = args.queries
469
    if args.client is None:
A
alesapin 已提交
470
        if find_binary(args.binary + '-client'):
P
proller 已提交
471
            args.client = args.binary + '-client'
A
alesapin 已提交
472
        elif find_binary(args.binary):
P
proller 已提交
473
            args.client = args.binary + ' client'
474
        else:
A
alesapin 已提交
475 476 477 478
            print("No 'clickhouse' binary found in PATH")
            parser.print_help()
            exit(1)

P
proller 已提交
479
        if args.configclient:
480 481 482 483 484
            args.client += ' --config-file=' + args.configclient
        if os.getenv("CLICKHOUSE_HOST"):
            args.client += ' --host=' + os.getenv("CLICKHOUSE_HOST")
        if os.getenv("CLICKHOUSE_PORT_TCP"):
            args.client += ' --port=' + os.getenv("CLICKHOUSE_PORT_TCP")
485 486 487 488
        if os.getenv("CLICKHOUSE_DATABASE"):
            args.client += ' --database=' + os.getenv("CLICKHOUSE_DATABASE")

    args.client_with_database = args.client
489 490 491 492 493 494 495 496
    if not args.database:
        def random_str(length=6):
            import random
            import string
            alphabet = string.ascii_lowercase + string.digits
            return ''.join(random.choice(alphabet) for _ in range(length))
        args.database = 'test_{suffix}'.format(suffix=random_str())
    args.client_with_database += ' --database=' + args.database
497

P
proller 已提交
498 499 500 501 502 503
    if args.extract_from_config is None:
        if os.access(args.binary + '-extract-from-config', os.X_OK):
            args.extract_from_config = args.binary + '-extract-from-config'
        else:
            args.extract_from_config = args.binary + ' extract-from-config'

504
    main(args)