未验证 提交 e7df8893 编写于 作者: A Alexander Kuzmenkov 提交者: GitHub

Merge pull request #22300 from ClickHouse/trying_parallel_func_tests

Trying parallel func tests
......@@ -74,12 +74,17 @@ function run_tests()
ADDITIONAL_OPTIONS+=('--order=random')
ADDITIONAL_OPTIONS+=('--skip')
ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip')
ADDITIONAL_OPTIONS+=('--jobs')
ADDITIONAL_OPTIONS+=('4')
# Note that flaky check must be ran in parallel, but for now we run
# everything in parallel except DatabaseReplicated. See below.
fi
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--replicated-database')
else
# Too many tests fail for DatabaseReplicated in parallel. All other
# configurations are OK.
ADDITIONAL_OPTIONS+=('--jobs')
ADDITIONAL_OPTIONS+=('8')
fi
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
......
......@@ -38,7 +38,21 @@ void StorageSystemClusters::fillData(MutableColumns & res_columns, const Context
for (const auto & name_and_database : databases)
{
if (const auto * replicated = typeid_cast<const DatabaseReplicated *>(name_and_database.second.get()))
writeCluster(res_columns, {name_and_database.first, replicated->getCluster()});
{
// A quick fix for stateless tests with DatabaseReplicated. Its ZK
// node can be destroyed at any time. If another test lists
// system.clusters to get client command line suggestions, it will
// get an error when trying to get the info about DB from ZK.
// Just ignore these inaccessible databases. A good example of a
// failing test is `01526_client_start_and_exit`.
try {
writeCluster(res_columns, {name_and_database.first, replicated->getCluster()});
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}
}
......
......@@ -305,6 +305,9 @@ def run_tests_array(all_tests_with_params):
failures_total = 0
failures = 0
failures_chain = 0
start_time = datetime.now()
is_concurrent = multiprocessing.current_process().name != "MainProcess"
client_options = get_additional_client_options(args)
......@@ -315,7 +318,7 @@ def run_tests_array(all_tests_with_params):
return ''
if all_tests:
print("\nRunning {} {} tests.".format(len(all_tests), suite) + "\n")
print(f"\nRunning {len(all_tests)} {suite} tests ({multiprocessing.current_process().name}).\n")
for case in all_tests:
if SERVER_DIED:
......@@ -330,7 +333,6 @@ def run_tests_array(all_tests_with_params):
try:
status = ''
is_concurrent = multiprocessing.current_process().name != "MainProcess"
if not is_concurrent:
sys.stdout.flush()
sys.stdout.write("{0:72}".format(name + ": "))
......@@ -499,12 +501,18 @@ def run_tests_array(all_tests_with_params):
failures_total = failures_total + failures
if failures_total > 0:
print(colored("\nHaving {failures_total} errors! {passed_total} tests passed. {skipped_total} tests skipped.".format(
passed_total = passed_total, skipped_total = skipped_total, failures_total = failures_total), args, "red", attrs=["bold"]))
print(colored(f"\nHaving {failures_total} errors! {passed_total} tests passed."
f" {skipped_total} tests skipped. {(datetime.now() - start_time).total_seconds():.2f} s elapsed"
f' ({multiprocessing.current_process().name}).',
args, "red", attrs=["bold"]))
exit_code = 1
else:
print(colored("\n{passed_total} tests passed. {skipped_total} tests skipped.".format(
passed_total = passed_total, skipped_total = skipped_total), args, "green", attrs=["bold"]))
print(colored(f"\n{passed_total} tests passed. {skipped_total} tests skipped."
f" {(datetime.now() - start_time).total_seconds():.2f} s elapsed"
f' ({multiprocessing.current_process().name}).',
args, "green", attrs=["bold"]))
sys.stdout.flush()
server_logs_level = "warning"
......@@ -799,7 +807,8 @@ def main(args):
if jobs > run_total:
run_total = jobs
batch_size = len(parallel_tests) // jobs
# Create two batches per process for more uniform execution time.
batch_size = max(1, len(parallel_tests) // (jobs * 2))
parallel_tests_array = []
for i in range(0, len(parallel_tests), batch_size):
parallel_tests_array.append((parallel_tests[i:i+batch_size], suite, suite_dir, suite_tmp_dir))
......
......@@ -62,7 +62,11 @@ OPTIMIZE TABLE four_rows_per_granule FINAL;
SELECT COUNT(*) FROM four_rows_per_granule;
SELECT distinct(marks) from system.parts WHERE table = 'four_rows_per_granule' and database=currentDatabase() and active=1;
-- We expect zero marks here, so we might get zero rows if all the parts were
-- deleted already. This can happen in parallel runs where there may be a long delay
-- between queries. So we must write the query in such a way that it always returns
-- zero rows if OK.
SELECT distinct(marks) d from system.parts WHERE table = 'four_rows_per_granule' and database=currentDatabase() and active=1 having d > 0;
INSERT INTO four_rows_per_granule (p, k, v1, v2, Sign, Version) VALUES ('2018-05-15', 1, 1000, 2000, 1, 1), ('2018-05-16', 2, 3000, 4000, 1, 1), ('2018-05-17', 3, 5000, 6000, 1, 1), ('2018-05-18', 4, 7000, 8000, 1, 1);
......@@ -120,6 +124,10 @@ OPTIMIZE TABLE six_rows_per_granule FINAL;
SELECT COUNT(*) FROM six_rows_per_granule;
SELECT distinct(marks) from system.parts WHERE table = 'six_rows_per_granule' and database=currentDatabase() and active=1;
-- We expect zero marks here, so we might get zero rows if all the parts were
-- deleted already. This can happen in parallel runs where there may be a long delay
-- between queries. So we must write the query in such a way that it always returns
-- zero rows if OK.
SELECT distinct(marks) d from system.parts WHERE table = 'six_rows_per_granule' and database=currentDatabase() and active=1 having d > 0;
DROP TABLE IF EXISTS six_rows_per_granule;
......@@ -2,7 +2,7 @@ drop table if exists ttl;
create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d) ttl d + interval 1 day;
system stop ttl merges;
system stop ttl merges ttl;
insert into ttl values (toDateTime('2000-10-10 00:00:00'), 1), (toDateTime('2000-10-10 00:00:00'), 2)
insert into ttl values (toDateTime('2100-10-10 00:00:00'), 3), (toDateTime('2100-10-10 00:00:00'), 4);
......@@ -11,7 +11,7 @@ select sleep(1) format Null; -- wait if very fast merge happen
optimize table ttl partition 10 final;
select * from ttl order by d, a;
system start ttl merges;
system start ttl merges ttl;
optimize table ttl partition 10 final;
select * from ttl order by d, a;
......
......@@ -12,4 +12,3 @@ Check if another query is passed
Modify max_concurrent_queries back to 1
Check if another query with less marks to read is throttled
yes
finished long_running_query default select sleepEachRow(0.01) from simple settings max_block_size = 1 format Null
......@@ -18,9 +18,11 @@ settings index_granularity = 1, max_concurrent_queries = 1, min_marks_to_honor_m
insert into simple select number, number + 100 from numbers(1000);
"
query_id="long_running_query-$CLICKHOUSE_DATABASE"
echo "Spin up a long running query"
${CLICKHOUSE_CLIENT} --query "select sleepEachRow(0.01) from simple settings max_block_size = 1 format Null" --query_id "long_running_query" > /dev/null 2>&1 &
wait_for_query_to_start 'long_running_query'
${CLICKHOUSE_CLIENT} --query "select sleepEachRow(0.01) from simple settings max_block_size = 1 format Null" --query_id "$query_id" > /dev/null 2>&1 &
wait_for_query_to_start "$query_id"
# query which reads marks >= min_marks_to_honor_max_concurrent_queries is throttled
echo "Check if another query with some marks to read is throttled"
......@@ -61,7 +63,7 @@ CODE=$?
[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1;
echo "yes"
${CLICKHOUSE_CLIENT} --query "KILL QUERY WHERE query_id = 'long_running_query' SYNC"
${CLICKHOUSE_CLIENT} --query "KILL QUERY WHERE query_id = '$query_id' SYNC FORMAT Null"
wait
${CLICKHOUSE_CLIENT} --multiline --multiquery --query "
......
......@@ -641,6 +641,7 @@
"01542_dictionary_load_exception_race",
"01545_system_errors", // looks at the difference of values in system.errors
"01560_optimize_on_insert_zookeeper",
"01563_distributed_query_finish", // looks at system.errors which is global
"01575_disable_detach_table_of_dictionary",
"01593_concurrent_alter_mutations_kill",
"01593_concurrent_alter_mutations_kill_many_replicas",
......@@ -667,6 +668,7 @@
"01702_system_query_log", // Runs many global system queries
"01715_background_checker_blather_zookeeper",
"01721_engine_file_truncate_on_insert", // It's ok to execute in parallel but not several instances of the same test.
"01722_long_brotli_http_compression_json_format", // it is broken in some unimaginable way with the genius error "cannot write to ofstream", not sure how to debug this
"01747_alter_partition_key_enum_zookeeper",
"01748_dictionary_table_dot", // creates database
"01760_polygon_dictionaries",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册