From 49448b4a00fe22df61e8a45fce91b669e1fc70ff Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Mar 2019 19:47:02 +0300 Subject: [PATCH] Retry on Exception connection loss --- dbms/tests/clickhouse-test | 47 ++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index 1cf4882ddc..e69594cdc7 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -30,6 +30,10 @@ MSG_UNKNOWN = OP_SQUARE_BRACKET + colored(" UNKNOWN ", "yellow", attrs=['bold']) MSG_OK = OP_SQUARE_BRACKET + colored(" OK ", "green", attrs=['bold']) + CL_SQUARE_BRACKET MSG_SKIPPED = OP_SQUARE_BRACKET + colored(" SKIPPED ", "cyan", attrs=['bold']) + CL_SQUARE_BRACKET +MESSAGES_TO_RETRY = [ + "Coordination::Exception: Connection loss", +] + def remove_control_characters(s): """ @@ -44,6 +48,28 @@ def remove_control_characters(s): s = re.sub(ur"[\x00-\x08\x0b\x0e-\x1f\x7f]", "", s) return s + +def run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file): + if ext == '.sql': + command = "{0} --send_logs_level={1} --testmode --multiquery < {2} > {3} 2> {4}".format(args.client, server_logs_level, case_file, stdout_file, stderr_file) + else: + command = "{} > {} 2> {}".format(case_file, stdout_file, stderr_file) + + proc = Popen(command, shell = True) + start_time = datetime.now() + while (datetime.now() - start_time).total_seconds() < args.timeout and proc.poll() is None: + sleep(0.01) + + stdout = open(stdout_file, 'r').read() if os.path.exists(stdout_file) else '' + stdout = unicode(stdout, errors='replace', encoding='utf-8') + stderr = open(stderr_file, 'r').read() if os.path.exists(stderr_file) else '' + stderr = unicode(stderr, errors='replace', encoding='utf-8') + + return proc, stdout, stderr + +def need_retry(stderr): + return any(msg in stderr for msg in MESSAGES_TO_RETRY) + def main(args): SERVER_DIED = False @@ -201,16 +227,7 @@ def main(args): stdout_file = os.path.join(suite_tmp_dir, name) + '.stdout' stderr_file = os.path.join(suite_tmp_dir, name) + '.stderr' - if ext == '.sql': - command = "{0} --send_logs_level={1} --testmode --multiquery < {2} > {3} 2> {4}".format(args.client, server_logs_level, case_file, stdout_file, stderr_file) - else: - command = "{} > {} 2> {}".format(case_file, stdout_file, stderr_file) - - proc = Popen(command, shell = True) - start_time = datetime.now() - while (datetime.now() - start_time).total_seconds() < args.timeout and proc.poll() is None: - sleep(0.01) - + proc, stdout, stderr = run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file) if proc.returncode is None: try: proc.kill() @@ -224,10 +241,12 @@ def main(args): failures += 1 print("{0} - Timeout!".format(MSG_FAIL)) else: - stdout = open(stdout_file, 'r').read() if os.path.exists(stdout_file) else '' - stdout = unicode(stdout, errors='replace', encoding='utf-8') - stderr = open(stderr_file, 'r').read() if os.path.exists(stderr_file) else '' - stderr = unicode(stderr, errors='replace', encoding='utf-8') + counter = 1 + while proc.returncode != 0 and need_retry(stderr): + proc, stdout, stderr = run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file) + sleep(2**counter) + if counter > 6: + break if proc.returncode != 0: failure = et.Element("failure", attrib = {"message": "return code {}".format(proc.returncode)}) -- GitLab