未验证 提交 b1d8ff45 编写于 作者: Z Zhou Wei 提交者: GitHub

running unit test sigle GPU parallely on Linux/windows GPU (#29523)

上级 10a8f3e5
......@@ -994,8 +994,14 @@ function card_test() {
if (( $cardnumber > $CUDA_DEVICE_COUNT )); then
cardnumber=$CUDA_DEVICE_COUNT
fi
if (( $# > 2 )); then
parallel_job=$3
else
parallel_job=1
fi
else
cardnumber=$CUDA_DEVICE_COUNT
parallel_job=1
fi
if [[ "$testcases" == "" ]]; then
......@@ -1005,6 +1011,9 @@ function card_test() {
trap 'caught_error' CHLD
tmpfile_rand=`date +%s%N`
NUM_PROC=$[CUDA_DEVICE_COUNT/$cardnumber]
echo "****************************************************************"
echo "***These unittests run $parallel_job job each time with $cardnumber GPU***"
echo "****************************************************************"
for (( i = 0; i < $NUM_PROC; i++ )); do
# CUDA_VISIBLE_DEVICES http://acceleware.com/blog/cudavisibledevices-masking-gpus
# ctest -I https://cmake.org/cmake/help/v3.0/manual/ctest.1.html?highlight=ctest
......@@ -1019,15 +1028,15 @@ function card_test() {
tmpfile=$tmp_dir/$tmpfile_rand"_"$i
if [ ${TESTING_DEBUG_MODE:-OFF} == "ON" ] ; then
if [[ $cardnumber == $CUDA_DEVICE_COUNT ]]; then
(ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" -V --timeout 120 | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
(ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" -V --timeout 120 -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
else
(env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 -V | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
(env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 -V -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
fi
else
if [[ $cardnumber == $CUDA_DEVICE_COUNT ]]; then
(ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 --output-on-failure | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
(ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
else
(env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 --output-on-failure | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
(env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) &
fi
fi
done
......@@ -1076,13 +1085,23 @@ set -x
set +x
EXIT_CODE=0;
test_cases=$(ctest -N -V) # get all test cases
single_card_tests_eight_parallel='^job$' # cases list which would run 8 job each time with single GPU
single_card_tests_tetrad_parallel='^job$' # cases list which would run 4 job each time with single GPU
single_card_tests_non_parallel_1='^job$' # cases list which would run 1 job each time with single GPU
single_card_tests_non_parallel_2='^job$' # cases list which would run 1 job each time with single GPU
single_card_tests='^job$' # all cases list which would take one graph card
exclusive_tests='' # cases list which would be run exclusively
single_card_tests='' # cases list which would take one graph card
multiple_card_tests='' # cases list which would take multiple GPUs, most cases would be two GPUs
is_exclusive='' # indicate whether the case is exclusive type
is_multicard='' # indicate whether the case is multiple GPUs type
is_nightly='' # indicate whether the case will only run at night
get_quickly_disable_ut||disable_ut_quickly='' # indicate whether the case was in quickly disable list
get_quickly_disable_ut||disable_ut_quickly='' # indicate whether the case was in quickly disable list
UT_list=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d')
output=$(python ${PADDLE_ROOT}/tools/parallel_UT_rule.py "${UT_list}")
eight_parallel_job=$(echo $output | cut -d ";" -f 1)
tetrad_parallel_jog=$(echo $output | cut -d ";" -f 2)
non_parallel_job=$(echo $output | cut -d ";" -f 3)
while read -r line; do
if [[ "$line" == "" ]]; then
continue
......@@ -1136,20 +1155,16 @@ set +x
multiple_card_tests="$multiple_card_tests|^$testcase$"
fi
else
if [[ "${#single_card_tests}" -gt 10000 ]];then
if [[ "$single_card_tests_1" == "" ]]; then
single_card_tests_1="^$testcase$"
else
single_card_tests_1="$single_card_tests_1|^$testcase$"
fi
continue
fi
if [[ "$single_card_tests" == "" ]]; then
single_card_tests="^$testcase$"
if [[ $(echo $eight_parallel_job | grep $testcase) != "" ]]; then
single_card_tests_eight_parallel="$single_card_tests_eight_parallel|^$testcase$"
elif [[ $(echo $tetrad_parallel_jog | grep $testcase) != "" ]]; then
single_card_tests_tetrad_parallel="$single_card_tests_tetrad_parallel|^$testcase$"
elif [[ "${#single_card_tests_non_parallel_1}" -gt 10000 ]];then
single_card_tests_non_parallel_2="$single_card_tests_non_parallel_2|^$testcase$"
else
single_card_tests="$single_card_tests|^$testcase$"
single_card_tests_non_parallel_1="$single_card_tests_non_parallel_1|^$testcase$"
fi
single_card_tests="$single_card_tests|^$testcase$"
fi
is_exclusive=''
is_multicard=''
......@@ -1158,10 +1173,12 @@ set +x
testcase=''
done <<< "$test_cases";
card_test "$single_card_tests" 1 # run cases with single GPU
card_test "$single_card_tests_1" 1 # run cases with single GPU
card_test "$multiple_card_tests" 2 # run cases with two GPUs
card_test "$exclusive_tests" # run cases exclusively, in this cases would be run with 4/8 GPUs
card_test "$single_card_tests_eight_parallel" 1 8 # run cases 8 job each time with single GPU
card_test "$single_card_tests_tetrad_parallel" 1 4 # run cases 4 job each time with single GPU
card_test "$single_card_tests_non_parallel_1" 1 # run cases 1 job each time with single GPU
card_test "$single_card_tests_non_parallel_2" 1 # run cases 1 job each time with single GPU
card_test "$multiple_card_tests" 2 # run cases with two GPUs
card_test "$exclusive_tests" # run cases exclusively, in this cases would be run with 4/8 GPUs
collect_failed_tests
rm -f $tmp_dir/*
exec_times=0
......@@ -1189,9 +1206,7 @@ set +x
for line in ${retry_unittests[@]} ;
do
one_card_tests=$single_card_tests'|'$single_card_tests_1
read tmp_one_tmp <<< "$( echo $one_card_tests | grep -oEi $line )"
read tmp_one_tmp <<< "$( echo $single_card_tests | grep -oEi $line )"
read tmp_mul_tmp <<< "$( echo $multiple_card_tests | grep -oEi $line )"
read exclusive_tmp <<< "$( echo $exclusive_tests | grep -oEi $line )"
......
......@@ -54,6 +54,8 @@ API_FILES=("CMakeLists.txt"
"python/paddle/fluid/tests/unittests/white_list/no_grad_set_white_list.py"
"tools/wlist.json"
"paddle/scripts/paddle_build.bat"
"tools/windows/run_unittests.sh"
"tools/parallel_UT_rule.py"
)
approval_line=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000`
......@@ -140,8 +142,11 @@ for API_FILE in ${API_FILES[*]}; do
elif [ "${API_FILE}" == "python/paddle/distributed/__init__.py" ]; then
echo_line="You must have (guru4elephant,raindrops2sea) approval for ${API_FILE} changes "
check_approval 1 35550832 38231817
elif [ "${API_FILE}" == "paddle/scripts/paddle_build.bat" ]; then
echo_line="You must have one RD (zhouwei25 (Recommend), luotao1) approval for ${API_FILE} changes, which manages all Paddle CI task on Windows.\n"
elif [ "${API_FILE}" == "paddle/scripts/paddle_build.bat" ] || [ "${API_FILE}" == "tools/windows/run_unittests.sh" ]; then
echo_line="You must have one RD (zhouwei25 (Recommend), luotao1) approval for ${API_FILE} changes, which manages the Paddle CI task on Windows.\n"
check_approval 1 52485244 6836917
elif [ "${API_FILE}" == "tools/parallel_UT_rule.py" ]; then
echo_line="You must have one RD (zhouwei25 (Recommend), luotao1) approval for ${API_FILE} changes, which manages the rule of running unittest with a same GPU. If the unittest failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, you can remove it from ${API_FILE}.\n"
check_approval 1 52485244 6836917
elif [ "${API_FILE}" == "python/paddle/fluid/parallel_executor.py" ]; then
echo_line="You must have one RD (Xreki,luotao1,zhhsplendid) approval for ${API_FILE}, which manages the underlying code for PaddlePaddle.\n"
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
# *=======These unittest doesn't occupy GPU memory, just run as CPU unittest=======* #
# It run 8 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED,
# just remove it from this list.
CPU_PARALLEL_JOB = [
'test_row_conv',
'test_nce',
'test_conv3d_mkldnn_op',
'dim_test',
'test_limit_gpu_memory',
'profiler_test',
'test_dequantize_mkldnn_op',
'test_elementwise_add_bf16_mkldnn_op',
'test_rpn_target_assign_op',
'test_hash_op',
'reader_blocking_queue_test',
'jit_kernel_test',
'test_tdm_child_op',
'test_simplify_with_basic_ops_pass',
'test_sequence_last_step',
'test_sequence_first_step',
'test_seq_concat_fc_fuse_pass',
'test_fc_gru_fuse_pass',
'test_dataset_imdb',
'dlpack_tensor_test',
'check_reduce_rank_test',
'var_type_traits_test',
'var_type_inference_test',
'to_string_test',
'threadpool_test',
'test_version',
'test_var_info',
'test_var_conv_2d',
'test_unique_name',
'test_transpose_int8_mkldnn_op',
'test_transpose_bf16_mkldnn_op',
'test_trainable',
'test_teacher_student_sigmoid_loss_op',
'test_tdm_sampler_op',
'test_switch',
'test_static_shape_inferrence_for_shape_tensor',
'test_squared_mat_sub_fuse_pass',
'test_sequence_scatter_op',
'test_sequence_scatter_op',
'test_scaled_dot_product_attention',
'test_rnn_memory_helper_op',
'test_requantize_mkldnn_op',
'test_quantize_transpiler',
'test_quantize_mkldnn_op',
'test_py_reader_sample_generator',
'test_parallel_executor_seresnext_with_reduce_cpu',
'test_parallel_executor_seresnext_with_fuse_all_reduce_cpu',
'test_parallel_executor_seresnext_base_cpu',
'test_parallel_dygraph_sync_batch_norm',
'test_origin_info',
'test_multiclass_nms_op',
'test_monitor',
'test_mkldnn_conv_bias_fuse_pass',
'test_mkldnn_conv_activation_fuse_pass',
'test_matrix_nms_op',
'test_ir_graph',
'test_inference_api',
'test_infer_shape',
'test_infer_no_need_buffer_slots',
'test_imperative_numpy_bridge',
'test_imperative_decorator',
'test_hooks',
'test_gpu_package_without_gpu_device',
'test_global_var_getter_setter',
'test_get_set_flags',
'test_fusion_repeated_fc_relu_op',
'test_fused_emb_seq_pool_op',
'test_fleet_base_4',
'test_fc_lstm_fuse_pass',
'test_executor_feed_non_tensor',
'test_executor_check_feed',
'test_executor_and_use_program_cache',
'test_exception',
'test_error_clip',
'test_embedding_eltwise_layernorm_fuse_pass',
'test_dyn_rnn',
'test_dpsgd_op',
'test_distributed_reader',
'test_directory_migration',
'test_dataset_wmt',
'test_dataset_uci_housing',
'test_dataset_cifar',
'test_data_feeder',
'test_cudnn_placement_pass',
'test_conv3d_layer',
'test_concat_bf16_mkldnn_op',
'test_common_infer_shape_functions',
'test_check_import_scipy',
'test_calc_gradient',
'test_bipartite_match_op',
'test_attention_lstm_op',
'test_array_read_write_op',
'stringprintf_test',
'stringpiece_test',
'selected_rows_test',
'scope_test',
'reader_test',
'prune_test',
'op_tester',
'eigen_test',
'device_worker_test',
'cudnn_helper_test',
'cudnn_desc_test',
'tuple_test',
'timer_test',
'test_zeros_op',
'test_while_op',
'test_utils',
'test_static_analysis',
'test_split_and_merge_lod_tensor_op',
'test_spawn_and_init_parallel_env',
'test_slice_var',
'test_similarity_focus_op',
'test_shuffle_batch_op',
'test_shrink_rnn_memory',
'test_set_bool_attr',
'test_sequence_topk_avg_pooling',
'test_selected_rows',
'test_scope',
'test_sampling_id_op',
'test_runtime_and_compiletime_exception',
'test_run_fluid_by_module_or_command_line',
'test_retinanet_detection_output',
'test_require_version',
'test_repeated_fc_relu_fuse_pass',
'test_registry',
'test_recurrent_op',
'test_recommender_system',
'test_query_op',
'test_quantization_mkldnn_pass',
'test_quant2_int8_mkldnn_pass',
'test_pybind_interface',
'test_py_reader_error_msg',
'test_prune',
'test_protobuf',
'test_progressbar',
'test_program_to_string',
'test_program_code',
'test_program',
'test_precision_recall_op',
'test_positive_negative_pair_op',
'test_parallel_executor_run_load_infer_program',
'test_op_version',
'test_op_support_gpu',
'test_ones_op',
'test_npair_loss_op',
'test_nn_functional_embedding_static',
'test_name_scope',
'test_multiprocess_dataloader_iterable_dataset_split',
'test_multi_gru_mkldnn_op',
'test_mul_int8_mkldnn_op',
'test_mkldnn_scale_matmul_fuse_pass',
'test_mkldnn_op_inplace',
'test_mkldnn_matmul_transpose_reshape_fuse_pass',
'test_mkldnn_inplace_fuse_pass',
'test_mkldnn_cpu_bfloat16_pass',
'test_mine_hard_examples_op',
'test_memory_usage',
'test_matmul_mkldnn_op',
'test_matmul_bf16_mkldnn_op',
'test_math_op_patch',
'test_match_matrix_tensor_op',
'test_lookup_table_dequant_op',
'test_logging_utils',
'test_logger',
'test_lod_tensor_array_ops',
'test_lod_tensor_array',
'test_lod_rank_table',
'test_lod_array_length_op',
'test_locality_aware_nms_op',
'test_load_vars_shape_check',
'test_load_op_xpu',
'test_load_op',
'test_linear_chain_crf_op',
'test_layer_norm_mkldnn_op',
'test_layer_norm_bf16_mkldnn_op',
'test_lambv2_op',
'test_ir_skip_layernorm_pass',
'test_io_save_load',
'test_input_spec',
'test_inference_model_io',
'test_imperative_base',
'test_image_classification_layer',
'test_image',
'test_ifelse_basic',
'test_hsigmoid_op',
'test_generator',
'test_generate_proposal_labels_op',
'test_generate_mask_labels_op',
'test_gast_with_compatibility',
'test_fusion_squared_mat_sub_op',
'test_fusion_seqconv_eltadd_relu_op',
'test_fusion_lstm_op',
'test_fusion_gru_op',
'test_fusion_gru_int8_mkldnn_op',
'test_fusion_gru_bf16_mkldnn_op',
'test_fused_embedding_fc_lstm_op',
'test_function_spec',
'test_full_op',
'test_framework_debug_str',
'test_fp16_utils',
'test_fleet_rolemaker_4',
'test_flags_use_mkldnn',
'test_filter_by_instag_op',
'test_fetch_var',
'test_fetch_handler',
'test_feed_fetch_method',
'test_fc_mkldnn_op',
'test_fc_lstm_fuse_pass',
'test_fc_gru_fuse_pass',
'test_fc_bf16_mkldnn_op',
'test_entry_attr',
'test_entry_attr2',
'test_elementwise_mul_bf16_mkldnn_op',
'test_eager_deletion_recurrent_op',
'test_eager_deletion_padding_rnn',
'test_eager_deletion_mnist',
'test_eager_deletion_dynamic_rnn_base',
'test_eager_deletion_conditional_block',
'test_dynrnn_static_input',
'test_dynrnn_gradient_check',
'test_dygraph_mode_of_unittest',
'test_download',
'test_distributions',
'test_detection_map_op',
'test_desc_clone',
'test_depthwise_conv_mkldnn_pass',
'test_deprecated_memory_optimize_interfaces',
'test_default_scope_funcs',
'test_default_dtype',
'test_datasets',
'test_dataset_voc',
'test_dataset_movielens',
'test_dataset_imikolov',
'test_dataset_conll05',
'test_data_generator',
'test_data',
'test_cyclic_cifar_dataset',
'test_crypto',
'test_create_op_doc_string',
'test_create_global_var',
'test_conv3d_transpose_layer',
'test_conv2d_transpose_layer',
'test_conv2d_mkldnn_op',
'test_conv2d_layer',
'test_conv2d_int8_mkldnn_op',
'test_conv2d_bf16_mkldnn_op',
'test_const_value',
'test_conditional_block',
'test_concat_int8_mkldnn_op',
'test_compat',
'test_collective_base',
'test_collective_api_base',
'test_chunk_eval_op',
'test_broadcast_to_op',
'test_broadcast_shape',
'test_broadcast_error',
'test_bpr_loss_op',
'test_beam_search_op',
'test_batch_sampler',
'test_basic_rnn_name',
'test_aligned_allocator',
'scatter_test',
'save_load_combine_op_test',
'program_desc_test',
'lodtensor_printer_test',
'lod_tensor_test',
'gather_test',
'gather_op_test',
'fused_broadcast_op_test',
'exception_holder_test',
'decorator_test',
'ddim_test',
'data_layout_transform_test',
'cpu_vec_test',
'cow_ptr_tests',
'conditional_block_op_test',
'bfloat16_test',
'assign_op_test',
'unroll_array_ops_test',
'test_seqpool_cvm_concat_fuse_pass',
'test_seqpool_concat_fuse_pass',
'test_reshape_bf16_op',
'test_repeated_fc_relu_fuse_pass',
'test_py_reader_return_list',
'test_py_reader_lod_level_share',
'test_protobuf_descs',
'test_paddle_inference_api',
'test_operator_desc',
'test_operator',
'test_mkldnn_matmul_op_output_fuse_pass',
'test_mkldnn_inplace_pass',
'test_mkldnn_conv_concat_relu_mkldnn_fuse_pass',
'test_layer',
'test_is_test_pass',
'test_graph_pattern_detector',
'test_fusion_seqpool_cvm_concat_op',
'test_fusion_seqpool_concat_op',
'test_fusion_seqexpand_concat_fc_op',
'test_fusion_gru_mkldnn_op',
'test_fleet_util',
'test_fleet_runtime',
'test_fleet_rolemaker_init',
'test_flags_mkldnn_ops_on_off',
'test_dataset_download',
'test_dataloader_unkeep_order',
'test_dataloader_keep_order',
'test_dataloader_dataset',
'test_crf_decoding_op',
'test_create_parameter',
'test_context_manager',
'test_analyzer',
'tensor_test',
'split_test',
'save_load_op_test',
'place_test',
'op_version_registry_test',
'op_proto_maker_test',
'op_kernel_type_test',
'mask_util_test',
'inlined_vector_test',
'infer_io_utils_tester',
'errors_test',
'enforce_test',
'dropout_op_test',
'data_type_test',
'cpu_info_test',
'cpu_helper_test',
'beam_search_decode_op_test',
'auto_growth_best_fit_allocator_test',
'test_skip_layernorm_fuse_pass',
'test_multihead_matmul_fuse_pass',
'test_fc_elementwise_layernorm_fuse_pass',
'version_test',
'variable_test',
'test_scale_matmul_fuse_pass',
'test_reshape_transpose_matmul_mkldnn_fuse_pass',
'test_multi_gru_seq_fuse_pass',
'test_multi_gru_fuse_pass',
'test_mkldnn_placement_pass',
'test_mkldnn_op_nhwc',
'test_matmul_transpose_reshape_fuse_pass',
'test_fs',
'test_fleet',
'test_cpu_quantize_squash_pass',
'test_cpu_quantize_placement_pass',
'test_cpu_quantize_pass',
'test_cpu_bfloat16_placement_pass',
'test_cpu_bfloat16_pass',
'test_conv_elementwise_add_mkldnn_fuse_pass',
'test_conv_concat_relu_mkldnn_fuse_pass',
'test_conv_bias_mkldnn_fuse_pass',
'test_conv_batch_norm_mkldnn_fuse_pass',
'test_conv_activation_mkldnn_fuse_pass',
'test_benchmark',
'test_batch_norm_act_fuse_pass',
'selected_rows_functor_test',
'save_load_util_test',
'pass_test',
'operator_test',
'operator_exception_test',
'op_debug_string_test',
'op_compatible_info_test',
'op_call_stack_test',
'node_test',
'no_need_buffer_vars_inference_test',
'nccl_context_test',
'math_function_test',
'init_test',
'graph_to_program_pass_test',
'graph_test',
'graph_helper_test',
'float16_test',
'dist_multi_trainer_test',
'cipher_utils_test',
'broadcast_op_test',
'aes_cipher_test',
]
# It run 4 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED,
# just remove it from this list.
TETRAD_PARALLEL_JOB = [
'system_allocator_test',
'buffered_allocator_test',
'test_tensor_to_numpy',
'test_imperative_framework',
'test_naive_best_fit_gpu_memory_limit',
'test_auto_growth_gpu_memory_limit',
'test_imperative_using_non_zero_gpu',
'cuda_helper_test',
'retry_allocator_test',
'allocator_facade_frac_flags_test',
]
def main():
eight_parallel_job = '^job$'
tetrad_parallel_job = '^job$'
non_parallel_job_1 = '^job$'
non_parallel_job_2 = '^job$'
test_cases = sys.argv[1]
test_cases = test_cases.split("\n")
for unittest in test_cases:
if unittest in CPU_PARALLEL_JOB:
eight_parallel_job = eight_parallel_job + '|^' + unittest + '$'
continue
if unittest in TETRAD_PARALLEL_JOB:
tetrad_parallel_job = tetrad_parallel_job + '|^' + unittest + '$'
continue
if len(non_parallel_job_1) < 10000:
non_parallel_job_1 = non_parallel_job_1 + '|^' + unittest + '$'
else:
non_parallel_job_2 = non_parallel_job_2 + '|^' + unittest + '$'
non_parallel_job = ",".join([non_parallel_job_1, non_parallel_job_2])
print("{};{};{}".format(eight_parallel_job, tetrad_parallel_job,
non_parallel_job))
if __name__ == '__main__':
main()
......@@ -204,4 +204,64 @@ long_time_test="^best_fit_allocator_test$|\
export FLAGS_call_stack_level=2
export FLAGS_fraction_of_gpu_memory_to_use=0.92
export CUDA_VISIBLE_DEVICES=0
ctest -E "$disable_ut_quickly|$diable_wingpu_test|$long_time_test" -LE "${nightly_label}" --output-on-failure -C Release --repeat until-pass:4 after-timeout:4
UT_list=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d')
num=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d' | wc -l)
echo "Windows 1 card TestCases count is $num"
output=$(python ${PADDLE_ROOT}/tools/parallel_UT_rule.py "${UT_list}")
eight_parallel_job=$(echo $output | cut -d ";" -f 1)
tetrad_parallel_jog=$(echo $output | cut -d ";" -f 2)
non_parallel_job=$(echo $output | cut -d ";" -f 3)
non_parallel_job_1=$(echo $non_parallel_job | cut -d "," -f 1)
non_parallel_job_2=$(echo $non_parallel_job | cut -d "," -f 2)
failed_test_lists=''
tmp_dir=`mktemp -d`
function collect_failed_tests() {
for file in `ls $tmp_dir`; do
grep -q 'The following tests FAILED:' $tmp_dir/$file
exit_code=$?
if [ $exit_code -ne 0 ]; then
failuretest=''
else
failuretest=`grep -A 10000 'The following tests FAILED:' $tmp_dir/$file | sed 's/The following tests FAILED://g'|sed '/^$/d'`
failed_test_lists="${failed_test_lists}
${failuretest}"
fi
done
}
function run_unittest() {
test_case=$1
parallel_job=$2
if [ "$2" == "" ]; then
parallel_job=1
else
parallel_job=$2
fi
echo "************************************************************************"
echo "********These unittests run $parallel_job job each time with 1 GPU**********"
echo "************************************************************************"
export CUDA_VISIBLE_DEVICES=0
tmpfile=$tmp_dir/$RANDOM
(ctest -R "$test_case" -E "$disable_ut_quickly|$diable_wingpu_test|$long_time_test" -LE "${nightly_label}" --output-on-failure -C Release -j $parallel_job --repeat until-pass:4 after-timeout:4 | tee $tmpfile ) &
wait;
}
set +e
run_unittest $eight_parallel_job 8
run_unittest $tetrad_parallel_jog 4
run_unittest $non_parallel_job_1
run_unittest $non_parallel_job_2
collect_failed_tests
set -e
rm -f $tmp_dir/*
if [[ "$failed_test_lists" != "" ]]; then
echo "========================================"
echo "Summary Failed Tests... "
echo "========================================"
echo "The following tests FAILED: "
echo "${failed_test_lists}"
exit 8
fi
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册