diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index fc4de4565b8e49bce96af6912a98a1dbd2ecf179..97729fbd3a9e426f0eb708086f607572bded115d 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -994,8 +994,14 @@ function card_test() { if (( $cardnumber > $CUDA_DEVICE_COUNT )); then cardnumber=$CUDA_DEVICE_COUNT fi + if (( $# > 2 )); then + parallel_job=$3 + else + parallel_job=1 + fi else cardnumber=$CUDA_DEVICE_COUNT + parallel_job=1 fi if [[ "$testcases" == "" ]]; then @@ -1005,6 +1011,9 @@ function card_test() { trap 'caught_error' CHLD tmpfile_rand=`date +%s%N` NUM_PROC=$[CUDA_DEVICE_COUNT/$cardnumber] + echo "****************************************************************" + echo "***These unittests run $parallel_job job each time with $cardnumber GPU***" + echo "****************************************************************" for (( i = 0; i < $NUM_PROC; i++ )); do # CUDA_VISIBLE_DEVICES http://acceleware.com/blog/cudavisibledevices-masking-gpus # ctest -I https://cmake.org/cmake/help/v3.0/manual/ctest.1.html?highlight=ctest @@ -1019,15 +1028,15 @@ function card_test() { tmpfile=$tmp_dir/$tmpfile_rand"_"$i if [ ${TESTING_DEBUG_MODE:-OFF} == "ON" ] ; then if [[ $cardnumber == $CUDA_DEVICE_COUNT ]]; then - (ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" -V --timeout 120 | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & + (ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" -V --timeout 120 -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & else - (env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 -V | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & + (env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 -V -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & fi else if [[ $cardnumber == $CUDA_DEVICE_COUNT ]]; then - (ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 --output-on-failure | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & + (ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & else - (env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 --output-on-failure | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & + (env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & fi fi done @@ -1076,13 +1085,23 @@ set -x set +x EXIT_CODE=0; test_cases=$(ctest -N -V) # get all test cases + single_card_tests_eight_parallel='^job$' # cases list which would run 8 job each time with single GPU + single_card_tests_tetrad_parallel='^job$' # cases list which would run 4 job each time with single GPU + single_card_tests_non_parallel_1='^job$' # cases list which would run 1 job each time with single GPU + single_card_tests_non_parallel_2='^job$' # cases list which would run 1 job each time with single GPU + single_card_tests='^job$' # all cases list which would take one graph card exclusive_tests='' # cases list which would be run exclusively - single_card_tests='' # cases list which would take one graph card multiple_card_tests='' # cases list which would take multiple GPUs, most cases would be two GPUs is_exclusive='' # indicate whether the case is exclusive type is_multicard='' # indicate whether the case is multiple GPUs type is_nightly='' # indicate whether the case will only run at night - get_quickly_disable_ut||disable_ut_quickly='' # indicate whether the case was in quickly disable list + get_quickly_disable_ut||disable_ut_quickly='' # indicate whether the case was in quickly disable list + + UT_list=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d') + output=$(python ${PADDLE_ROOT}/tools/parallel_UT_rule.py "${UT_list}") + eight_parallel_job=$(echo $output | cut -d ";" -f 1) + tetrad_parallel_jog=$(echo $output | cut -d ";" -f 2) + non_parallel_job=$(echo $output | cut -d ";" -f 3) while read -r line; do if [[ "$line" == "" ]]; then continue @@ -1136,20 +1155,16 @@ set +x multiple_card_tests="$multiple_card_tests|^$testcase$" fi else - if [[ "${#single_card_tests}" -gt 10000 ]];then - if [[ "$single_card_tests_1" == "" ]]; then - single_card_tests_1="^$testcase$" - else - single_card_tests_1="$single_card_tests_1|^$testcase$" - fi - continue - fi - - if [[ "$single_card_tests" == "" ]]; then - single_card_tests="^$testcase$" + if [[ $(echo $eight_parallel_job | grep $testcase) != "" ]]; then + single_card_tests_eight_parallel="$single_card_tests_eight_parallel|^$testcase$" + elif [[ $(echo $tetrad_parallel_jog | grep $testcase) != "" ]]; then + single_card_tests_tetrad_parallel="$single_card_tests_tetrad_parallel|^$testcase$" + elif [[ "${#single_card_tests_non_parallel_1}" -gt 10000 ]];then + single_card_tests_non_parallel_2="$single_card_tests_non_parallel_2|^$testcase$" else - single_card_tests="$single_card_tests|^$testcase$" + single_card_tests_non_parallel_1="$single_card_tests_non_parallel_1|^$testcase$" fi + single_card_tests="$single_card_tests|^$testcase$" fi is_exclusive='' is_multicard='' @@ -1158,10 +1173,12 @@ set +x testcase='' done <<< "$test_cases"; - card_test "$single_card_tests" 1 # run cases with single GPU - card_test "$single_card_tests_1" 1 # run cases with single GPU - card_test "$multiple_card_tests" 2 # run cases with two GPUs - card_test "$exclusive_tests" # run cases exclusively, in this cases would be run with 4/8 GPUs + card_test "$single_card_tests_eight_parallel" 1 8 # run cases 8 job each time with single GPU + card_test "$single_card_tests_tetrad_parallel" 1 4 # run cases 4 job each time with single GPU + card_test "$single_card_tests_non_parallel_1" 1 # run cases 1 job each time with single GPU + card_test "$single_card_tests_non_parallel_2" 1 # run cases 1 job each time with single GPU + card_test "$multiple_card_tests" 2 # run cases with two GPUs + card_test "$exclusive_tests" # run cases exclusively, in this cases would be run with 4/8 GPUs collect_failed_tests rm -f $tmp_dir/* exec_times=0 @@ -1189,9 +1206,7 @@ set +x for line in ${retry_unittests[@]} ; do - one_card_tests=$single_card_tests'|'$single_card_tests_1 - - read tmp_one_tmp <<< "$( echo $one_card_tests | grep -oEi $line )" + read tmp_one_tmp <<< "$( echo $single_card_tests | grep -oEi $line )" read tmp_mul_tmp <<< "$( echo $multiple_card_tests | grep -oEi $line )" read exclusive_tmp <<< "$( echo $exclusive_tests | grep -oEi $line )" diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh index 93c48c2acf6bfe275d0b1ead4d7acfd553ddc149..fec5d63dc43f3e22a3e7e35776f20ed9fa0a232a 100644 --- a/tools/check_file_diff_approvals.sh +++ b/tools/check_file_diff_approvals.sh @@ -54,6 +54,8 @@ API_FILES=("CMakeLists.txt" "python/paddle/fluid/tests/unittests/white_list/no_grad_set_white_list.py" "tools/wlist.json" "paddle/scripts/paddle_build.bat" + "tools/windows/run_unittests.sh" + "tools/parallel_UT_rule.py" ) approval_line=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000` @@ -140,8 +142,11 @@ for API_FILE in ${API_FILES[*]}; do elif [ "${API_FILE}" == "python/paddle/distributed/__init__.py" ]; then echo_line="You must have (guru4elephant,raindrops2sea) approval for ${API_FILE} changes " check_approval 1 35550832 38231817 - elif [ "${API_FILE}" == "paddle/scripts/paddle_build.bat" ]; then - echo_line="You must have one RD (zhouwei25 (Recommend), luotao1) approval for ${API_FILE} changes, which manages all Paddle CI task on Windows.\n" + elif [ "${API_FILE}" == "paddle/scripts/paddle_build.bat" ] || [ "${API_FILE}" == "tools/windows/run_unittests.sh" ]; then + echo_line="You must have one RD (zhouwei25 (Recommend), luotao1) approval for ${API_FILE} changes, which manages the Paddle CI task on Windows.\n" + check_approval 1 52485244 6836917 + elif [ "${API_FILE}" == "tools/parallel_UT_rule.py" ]; then + echo_line="You must have one RD (zhouwei25 (Recommend), luotao1) approval for ${API_FILE} changes, which manages the rule of running unittest with a same GPU. If the unittest failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, you can remove it from ${API_FILE}.\n" check_approval 1 52485244 6836917 elif [ "${API_FILE}" == "python/paddle/fluid/parallel_executor.py" ]; then echo_line="You must have one RD (Xreki,luotao1,zhhsplendid) approval for ${API_FILE}, which manages the underlying code for PaddlePaddle.\n" diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py new file mode 100644 index 0000000000000000000000000000000000000000..49efc8b6776855fff0487b9fc0a672859cce8530 --- /dev/null +++ b/tools/parallel_UT_rule.py @@ -0,0 +1,444 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os + +# *=======These unittest doesn't occupy GPU memory, just run as CPU unittest=======* # +# It run 8 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, +# just remove it from this list. +CPU_PARALLEL_JOB = [ + 'test_row_conv', + 'test_nce', + 'test_conv3d_mkldnn_op', + 'dim_test', + 'test_limit_gpu_memory', + 'profiler_test', + 'test_dequantize_mkldnn_op', + 'test_elementwise_add_bf16_mkldnn_op', + 'test_rpn_target_assign_op', + 'test_hash_op', + 'reader_blocking_queue_test', + 'jit_kernel_test', + 'test_tdm_child_op', + 'test_simplify_with_basic_ops_pass', + 'test_sequence_last_step', + 'test_sequence_first_step', + 'test_seq_concat_fc_fuse_pass', + 'test_fc_gru_fuse_pass', + 'test_dataset_imdb', + 'dlpack_tensor_test', + 'check_reduce_rank_test', + 'var_type_traits_test', + 'var_type_inference_test', + 'to_string_test', + 'threadpool_test', + 'test_version', + 'test_var_info', + 'test_var_conv_2d', + 'test_unique_name', + 'test_transpose_int8_mkldnn_op', + 'test_transpose_bf16_mkldnn_op', + 'test_trainable', + 'test_teacher_student_sigmoid_loss_op', + 'test_tdm_sampler_op', + 'test_switch', + 'test_static_shape_inferrence_for_shape_tensor', + 'test_squared_mat_sub_fuse_pass', + 'test_sequence_scatter_op', + 'test_sequence_scatter_op', + 'test_scaled_dot_product_attention', + 'test_rnn_memory_helper_op', + 'test_requantize_mkldnn_op', + 'test_quantize_transpiler', + 'test_quantize_mkldnn_op', + 'test_py_reader_sample_generator', + 'test_parallel_executor_seresnext_with_reduce_cpu', + 'test_parallel_executor_seresnext_with_fuse_all_reduce_cpu', + 'test_parallel_executor_seresnext_base_cpu', + 'test_parallel_dygraph_sync_batch_norm', + 'test_origin_info', + 'test_multiclass_nms_op', + 'test_monitor', + 'test_mkldnn_conv_bias_fuse_pass', + 'test_mkldnn_conv_activation_fuse_pass', + 'test_matrix_nms_op', + 'test_ir_graph', + 'test_inference_api', + 'test_infer_shape', + 'test_infer_no_need_buffer_slots', + 'test_imperative_numpy_bridge', + 'test_imperative_decorator', + 'test_hooks', + 'test_gpu_package_without_gpu_device', + 'test_global_var_getter_setter', + 'test_get_set_flags', + 'test_fusion_repeated_fc_relu_op', + 'test_fused_emb_seq_pool_op', + 'test_fleet_base_4', + 'test_fc_lstm_fuse_pass', + 'test_executor_feed_non_tensor', + 'test_executor_check_feed', + 'test_executor_and_use_program_cache', + 'test_exception', + 'test_error_clip', + 'test_embedding_eltwise_layernorm_fuse_pass', + 'test_dyn_rnn', + 'test_dpsgd_op', + 'test_distributed_reader', + 'test_directory_migration', + 'test_dataset_wmt', + 'test_dataset_uci_housing', + 'test_dataset_cifar', + 'test_data_feeder', + 'test_cudnn_placement_pass', + 'test_conv3d_layer', + 'test_concat_bf16_mkldnn_op', + 'test_common_infer_shape_functions', + 'test_check_import_scipy', + 'test_calc_gradient', + 'test_bipartite_match_op', + 'test_attention_lstm_op', + 'test_array_read_write_op', + 'stringprintf_test', + 'stringpiece_test', + 'selected_rows_test', + 'scope_test', + 'reader_test', + 'prune_test', + 'op_tester', + 'eigen_test', + 'device_worker_test', + 'cudnn_helper_test', + 'cudnn_desc_test', + 'tuple_test', + 'timer_test', + 'test_zeros_op', + 'test_while_op', + 'test_utils', + 'test_static_analysis', + 'test_split_and_merge_lod_tensor_op', + 'test_spawn_and_init_parallel_env', + 'test_slice_var', + 'test_similarity_focus_op', + 'test_shuffle_batch_op', + 'test_shrink_rnn_memory', + 'test_set_bool_attr', + 'test_sequence_topk_avg_pooling', + 'test_selected_rows', + 'test_scope', + 'test_sampling_id_op', + 'test_runtime_and_compiletime_exception', + 'test_run_fluid_by_module_or_command_line', + 'test_retinanet_detection_output', + 'test_require_version', + 'test_repeated_fc_relu_fuse_pass', + 'test_registry', + 'test_recurrent_op', + 'test_recommender_system', + 'test_query_op', + 'test_quantization_mkldnn_pass', + 'test_quant2_int8_mkldnn_pass', + 'test_pybind_interface', + 'test_py_reader_error_msg', + 'test_prune', + 'test_protobuf', + 'test_progressbar', + 'test_program_to_string', + 'test_program_code', + 'test_program', + 'test_precision_recall_op', + 'test_positive_negative_pair_op', + 'test_parallel_executor_run_load_infer_program', + 'test_op_version', + 'test_op_support_gpu', + 'test_ones_op', + 'test_npair_loss_op', + 'test_nn_functional_embedding_static', + 'test_name_scope', + 'test_multiprocess_dataloader_iterable_dataset_split', + 'test_multi_gru_mkldnn_op', + 'test_mul_int8_mkldnn_op', + 'test_mkldnn_scale_matmul_fuse_pass', + 'test_mkldnn_op_inplace', + 'test_mkldnn_matmul_transpose_reshape_fuse_pass', + 'test_mkldnn_inplace_fuse_pass', + 'test_mkldnn_cpu_bfloat16_pass', + 'test_mine_hard_examples_op', + 'test_memory_usage', + 'test_matmul_mkldnn_op', + 'test_matmul_bf16_mkldnn_op', + 'test_math_op_patch', + 'test_match_matrix_tensor_op', + 'test_lookup_table_dequant_op', + 'test_logging_utils', + 'test_logger', + 'test_lod_tensor_array_ops', + 'test_lod_tensor_array', + 'test_lod_rank_table', + 'test_lod_array_length_op', + 'test_locality_aware_nms_op', + 'test_load_vars_shape_check', + 'test_load_op_xpu', + 'test_load_op', + 'test_linear_chain_crf_op', + 'test_layer_norm_mkldnn_op', + 'test_layer_norm_bf16_mkldnn_op', + 'test_lambv2_op', + 'test_ir_skip_layernorm_pass', + 'test_io_save_load', + 'test_input_spec', + 'test_inference_model_io', + 'test_imperative_base', + 'test_image_classification_layer', + 'test_image', + 'test_ifelse_basic', + 'test_hsigmoid_op', + 'test_generator', + 'test_generate_proposal_labels_op', + 'test_generate_mask_labels_op', + 'test_gast_with_compatibility', + 'test_fusion_squared_mat_sub_op', + 'test_fusion_seqconv_eltadd_relu_op', + 'test_fusion_lstm_op', + 'test_fusion_gru_op', + 'test_fusion_gru_int8_mkldnn_op', + 'test_fusion_gru_bf16_mkldnn_op', + 'test_fused_embedding_fc_lstm_op', + 'test_function_spec', + 'test_full_op', + 'test_framework_debug_str', + 'test_fp16_utils', + 'test_fleet_rolemaker_4', + 'test_flags_use_mkldnn', + 'test_filter_by_instag_op', + 'test_fetch_var', + 'test_fetch_handler', + 'test_feed_fetch_method', + 'test_fc_mkldnn_op', + 'test_fc_lstm_fuse_pass', + 'test_fc_gru_fuse_pass', + 'test_fc_bf16_mkldnn_op', + 'test_entry_attr', + 'test_entry_attr2', + 'test_elementwise_mul_bf16_mkldnn_op', + 'test_eager_deletion_recurrent_op', + 'test_eager_deletion_padding_rnn', + 'test_eager_deletion_mnist', + 'test_eager_deletion_dynamic_rnn_base', + 'test_eager_deletion_conditional_block', + 'test_dynrnn_static_input', + 'test_dynrnn_gradient_check', + 'test_dygraph_mode_of_unittest', + 'test_download', + 'test_distributions', + 'test_detection_map_op', + 'test_desc_clone', + 'test_depthwise_conv_mkldnn_pass', + 'test_deprecated_memory_optimize_interfaces', + 'test_default_scope_funcs', + 'test_default_dtype', + 'test_datasets', + 'test_dataset_voc', + 'test_dataset_movielens', + 'test_dataset_imikolov', + 'test_dataset_conll05', + 'test_data_generator', + 'test_data', + 'test_cyclic_cifar_dataset', + 'test_crypto', + 'test_create_op_doc_string', + 'test_create_global_var', + 'test_conv3d_transpose_layer', + 'test_conv2d_transpose_layer', + 'test_conv2d_mkldnn_op', + 'test_conv2d_layer', + 'test_conv2d_int8_mkldnn_op', + 'test_conv2d_bf16_mkldnn_op', + 'test_const_value', + 'test_conditional_block', + 'test_concat_int8_mkldnn_op', + 'test_compat', + 'test_collective_base', + 'test_collective_api_base', + 'test_chunk_eval_op', + 'test_broadcast_to_op', + 'test_broadcast_shape', + 'test_broadcast_error', + 'test_bpr_loss_op', + 'test_beam_search_op', + 'test_batch_sampler', + 'test_basic_rnn_name', + 'test_aligned_allocator', + 'scatter_test', + 'save_load_combine_op_test', + 'program_desc_test', + 'lodtensor_printer_test', + 'lod_tensor_test', + 'gather_test', + 'gather_op_test', + 'fused_broadcast_op_test', + 'exception_holder_test', + 'decorator_test', + 'ddim_test', + 'data_layout_transform_test', + 'cpu_vec_test', + 'cow_ptr_tests', + 'conditional_block_op_test', + 'bfloat16_test', + 'assign_op_test', + 'unroll_array_ops_test', + 'test_seqpool_cvm_concat_fuse_pass', + 'test_seqpool_concat_fuse_pass', + 'test_reshape_bf16_op', + 'test_repeated_fc_relu_fuse_pass', + 'test_py_reader_return_list', + 'test_py_reader_lod_level_share', + 'test_protobuf_descs', + 'test_paddle_inference_api', + 'test_operator_desc', + 'test_operator', + 'test_mkldnn_matmul_op_output_fuse_pass', + 'test_mkldnn_inplace_pass', + 'test_mkldnn_conv_concat_relu_mkldnn_fuse_pass', + 'test_layer', + 'test_is_test_pass', + 'test_graph_pattern_detector', + 'test_fusion_seqpool_cvm_concat_op', + 'test_fusion_seqpool_concat_op', + 'test_fusion_seqexpand_concat_fc_op', + 'test_fusion_gru_mkldnn_op', + 'test_fleet_util', + 'test_fleet_runtime', + 'test_fleet_rolemaker_init', + 'test_flags_mkldnn_ops_on_off', + 'test_dataset_download', + 'test_dataloader_unkeep_order', + 'test_dataloader_keep_order', + 'test_dataloader_dataset', + 'test_crf_decoding_op', + 'test_create_parameter', + 'test_context_manager', + 'test_analyzer', + 'tensor_test', + 'split_test', + 'save_load_op_test', + 'place_test', + 'op_version_registry_test', + 'op_proto_maker_test', + 'op_kernel_type_test', + 'mask_util_test', + 'inlined_vector_test', + 'infer_io_utils_tester', + 'errors_test', + 'enforce_test', + 'dropout_op_test', + 'data_type_test', + 'cpu_info_test', + 'cpu_helper_test', + 'beam_search_decode_op_test', + 'auto_growth_best_fit_allocator_test', + 'test_skip_layernorm_fuse_pass', + 'test_multihead_matmul_fuse_pass', + 'test_fc_elementwise_layernorm_fuse_pass', + 'version_test', + 'variable_test', + 'test_scale_matmul_fuse_pass', + 'test_reshape_transpose_matmul_mkldnn_fuse_pass', + 'test_multi_gru_seq_fuse_pass', + 'test_multi_gru_fuse_pass', + 'test_mkldnn_placement_pass', + 'test_mkldnn_op_nhwc', + 'test_matmul_transpose_reshape_fuse_pass', + 'test_fs', + 'test_fleet', + 'test_cpu_quantize_squash_pass', + 'test_cpu_quantize_placement_pass', + 'test_cpu_quantize_pass', + 'test_cpu_bfloat16_placement_pass', + 'test_cpu_bfloat16_pass', + 'test_conv_elementwise_add_mkldnn_fuse_pass', + 'test_conv_concat_relu_mkldnn_fuse_pass', + 'test_conv_bias_mkldnn_fuse_pass', + 'test_conv_batch_norm_mkldnn_fuse_pass', + 'test_conv_activation_mkldnn_fuse_pass', + 'test_benchmark', + 'test_batch_norm_act_fuse_pass', + 'selected_rows_functor_test', + 'save_load_util_test', + 'pass_test', + 'operator_test', + 'operator_exception_test', + 'op_debug_string_test', + 'op_compatible_info_test', + 'op_call_stack_test', + 'node_test', + 'no_need_buffer_vars_inference_test', + 'nccl_context_test', + 'math_function_test', + 'init_test', + 'graph_to_program_pass_test', + 'graph_test', + 'graph_helper_test', + 'float16_test', + 'dist_multi_trainer_test', + 'cipher_utils_test', + 'broadcast_op_test', + 'aes_cipher_test', +] + +# It run 4 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, +# just remove it from this list. +TETRAD_PARALLEL_JOB = [ + 'system_allocator_test', + 'buffered_allocator_test', + 'test_tensor_to_numpy', + 'test_imperative_framework', + 'test_naive_best_fit_gpu_memory_limit', + 'test_auto_growth_gpu_memory_limit', + 'test_imperative_using_non_zero_gpu', + 'cuda_helper_test', + 'retry_allocator_test', + 'allocator_facade_frac_flags_test', +] + + +def main(): + eight_parallel_job = '^job$' + tetrad_parallel_job = '^job$' + non_parallel_job_1 = '^job$' + non_parallel_job_2 = '^job$' + + test_cases = sys.argv[1] + test_cases = test_cases.split("\n") + for unittest in test_cases: + if unittest in CPU_PARALLEL_JOB: + eight_parallel_job = eight_parallel_job + '|^' + unittest + '$' + continue + if unittest in TETRAD_PARALLEL_JOB: + tetrad_parallel_job = tetrad_parallel_job + '|^' + unittest + '$' + continue + + if len(non_parallel_job_1) < 10000: + non_parallel_job_1 = non_parallel_job_1 + '|^' + unittest + '$' + else: + non_parallel_job_2 = non_parallel_job_2 + '|^' + unittest + '$' + + non_parallel_job = ",".join([non_parallel_job_1, non_parallel_job_2]) + print("{};{};{}".format(eight_parallel_job, tetrad_parallel_job, + non_parallel_job)) + + +if __name__ == '__main__': + main() diff --git a/tools/windows/run_unittests.sh b/tools/windows/run_unittests.sh index 0a21ffc5a425a51d4b75c89bdf316cd7a3f08b8b..a4340d9ecdaea7ff097e22b2ecd9126c19cac832 100644 --- a/tools/windows/run_unittests.sh +++ b/tools/windows/run_unittests.sh @@ -204,4 +204,64 @@ long_time_test="^best_fit_allocator_test$|\ export FLAGS_call_stack_level=2 export FLAGS_fraction_of_gpu_memory_to_use=0.92 export CUDA_VISIBLE_DEVICES=0 -ctest -E "$disable_ut_quickly|$diable_wingpu_test|$long_time_test" -LE "${nightly_label}" --output-on-failure -C Release --repeat until-pass:4 after-timeout:4 + +UT_list=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d') +num=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d' | wc -l) +echo "Windows 1 card TestCases count is $num" +output=$(python ${PADDLE_ROOT}/tools/parallel_UT_rule.py "${UT_list}") +eight_parallel_job=$(echo $output | cut -d ";" -f 1) +tetrad_parallel_jog=$(echo $output | cut -d ";" -f 2) +non_parallel_job=$(echo $output | cut -d ";" -f 3) + +non_parallel_job_1=$(echo $non_parallel_job | cut -d "," -f 1) +non_parallel_job_2=$(echo $non_parallel_job | cut -d "," -f 2) + +failed_test_lists='' +tmp_dir=`mktemp -d` +function collect_failed_tests() { + for file in `ls $tmp_dir`; do + grep -q 'The following tests FAILED:' $tmp_dir/$file + exit_code=$? + if [ $exit_code -ne 0 ]; then + failuretest='' + else + failuretest=`grep -A 10000 'The following tests FAILED:' $tmp_dir/$file | sed 's/The following tests FAILED://g'|sed '/^$/d'` + failed_test_lists="${failed_test_lists} + ${failuretest}" + fi + done +} + +function run_unittest() { + test_case=$1 + parallel_job=$2 + if [ "$2" == "" ]; then + parallel_job=1 + else + parallel_job=$2 + fi + echo "************************************************************************" + echo "********These unittests run $parallel_job job each time with 1 GPU**********" + echo "************************************************************************" + export CUDA_VISIBLE_DEVICES=0 + tmpfile=$tmp_dir/$RANDOM + (ctest -R "$test_case" -E "$disable_ut_quickly|$diable_wingpu_test|$long_time_test" -LE "${nightly_label}" --output-on-failure -C Release -j $parallel_job --repeat until-pass:4 after-timeout:4 | tee $tmpfile ) & + wait; +} + +set +e +run_unittest $eight_parallel_job 8 +run_unittest $tetrad_parallel_jog 4 +run_unittest $non_parallel_job_1 +run_unittest $non_parallel_job_2 +collect_failed_tests +set -e +rm -f $tmp_dir/* +if [[ "$failed_test_lists" != "" ]]; then + echo "========================================" + echo "Summary Failed Tests... " + echo "========================================" + echo "The following tests FAILED: " + echo "${failed_test_lists}" + exit 8 +fi