From ed6624ab78d70aa51ca25d6759e6d2ca4e9da9cb Mon Sep 17 00:00:00 2001 From: zhangchunle Date: Mon, 16 Aug 2021 21:18:43 +0800 Subject: [PATCH] concurrent (#34908) --- paddle/scripts/paddle_build.sh | 146 ++++--- tools/parallel_UT_rule.py | 692 ++++++++++++++++++++++++++++++++- 2 files changed, 788 insertions(+), 50 deletions(-) diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 1d3504556f..abaae9a361 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -1059,6 +1059,7 @@ function get_quickly_disable_ut() { function card_test() { set -m + CTEST_PARALLEL_LEVEL=2 case_count $1 $2 ut_startTime_s=`date +%s` @@ -1127,10 +1128,8 @@ function card_test() { ut_endTime_s=`date +%s` if (( $2 == -1 )); then echo "exclusive TestCases Total Time: $[ $ut_endTime_s - $ut_startTime_s ]s" - echo "ipipe_log_param_Exclusive_TestCases_Total_Time: $[ $ut_endTime_s - $ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt else echo "$2 card TestCases Total Time: $[ $ut_endTime_s - $ut_startTime_s ]s" - echo "ipipe_log_param_${2}_Cards_TestCases_Total_Time: $[ $ut_endTime_s - $ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt fi set +m } @@ -1181,16 +1180,19 @@ set +x EXIT_CODE=0; test_cases=$(ctest -N -V) # get all test cases # Note(zhouwei): Parallel runs are relative to 'CTEST_PARALLEL_LEVEL', e.g: '4 job each time' means 4*CTEST_PARALLEL_LEVEL - single_card_tests_high_parallel='^job$' # cases list which would run the most job each time with single GPU - single_card_tests_two_parallel='^job$' # cases list which would run 2 job each time with single GPU - single_card_tests_non_parallel='^job$' # cases list which would run 1 job each time with single GPU - single_card_tests='^job$' # all cases list which would take single GPU + single_card_tests_high_parallel='^job$' # cases list which would run 24 job each time with single GPU + single_card_tests_secondary_high_parallel='^job$' # cases list which would run 15 job each time with single GPU + single_card_tests_third_high_parallel='^job$' # cases list which would run 12 job each time with single GPU + single_card_tests_medium_parallel='^job$' # cases list which would run 7 job each time with single GPU + single_card_tests_non_parallel='^job$' # cases list which would run 2 job each time with single GPU + single_card_tests='^job$' # all cases list which would take single GPU - multiple_card_tests_two_parallel='^job$' # cases list which would run 2 job each time with multiple GPUs, most cases would be two GPUs - multiple_card_tests_non_parallel='^job$' # cases list which would run 1 job each time with multiple GPUs, most cases would be two GPUs + multiple_card_tests_medium_parallel='^job$' # cases list which would run 4 job each time with multiple GPUs, most cases would be two GPUs + multiple_card_tests_non_parallel='^job$' # cases list which would run 2 job each time with multiple GPUs, most cases would be two GPUs - exclusive_tests_two_parallel='^job$' # cases list which would run 2 job exclusively(with all GPUs) - exclusive_tests_non_parallel='^job$' # cases list which would run 1 job exclusively(with all GPUs) + exclusive_tests_high_parallel='^job$' # cases list which would run 5 job exclusively(with all GPUs) + exclusive_tests_medium_parallel='^job$' # cases list which would run 3 job exclusively(with all GPUs) + exclusive_tests_non_parallel='^job$' # cases list which would run 2 job exclusively(with all GPUs) is_exclusive='' # indicate whether the case is exclusive type is_multicard='' # indicate whether the case is multiple GPUs type @@ -1200,9 +1202,11 @@ set +x UT_list=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d') output=$(python ${PADDLE_ROOT}/tools/parallel_UT_rule.py "${UT_list}") cpu_parallel_job=$(echo $output | cut -d ";" -f 1) - tetrad_parallel_job=$(echo $output | cut -d ";" -f 2) - two_parallel_job=$(echo $output | cut -d ";" -f 3) - non_parallel_job=$(echo $output | cut -d ";" -f 4) + secondary_cpu_parallel_job=$(echo $output | cut -d ";" -f 2) + third_cpu_parallel_job=$(echo $output | cut -d ";" -f 3) + tetrad_parallel_job=$(echo $output | cut -d ";" -f 4) + two_parallel_job=$(echo $output | cut -d ";" -f 5) + non_parallel_job=$(echo $output | cut -d ";" -f 6) while read -r line; do if [[ "$line" == "" ]]; then continue @@ -1244,22 +1248,28 @@ set +x fi if [[ "$is_exclusive" != "" ]]; then - if [[ $(echo $cpu_parallel_job$tetrad_parallel_job$two_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then - exclusive_tests_two_parallel="$exclusive_tests_two_parallel|^$testcase$" + if [[ $(echo $cpu_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then + exclusive_tests_high_parallel="$exclusive_tests_high_parallel|^$testcase$" + elif [[ $(echo $tetrad_parallel_job$two_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then + exclusive_tests_medium_parallel="$exclusive_tests_medium_parallel|^$testcase$" else exclusive_tests_non_parallel="$exclusive_tests_non_parallel|^$testcase$" fi elif [[ "$is_multicard" != "" ]]; then - if [[ $(echo $cpu_parallel_job$tetrad_parallel_job$two_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then - multiple_card_tests_two_parallel="$multiple_card_tests_two_parallel|^$testcase$" + if [[ $(echo $cpu_parallel_job$tetrad_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then + multiple_card_tests_medium_parallel="$multiple_card_tests_medium_parallel|^$testcase$" else multiple_card_tests_non_parallel="$multiple_card_tests_non_parallel|^$testcase$" fi else if [[ $(echo $cpu_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then single_card_tests_high_parallel="$single_card_tests_high_parallel|^$testcase$" + elif [[ $(echo $secondary_cpu_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then + single_card_tests_secondary_high_parallel="$single_card_tests_secondary_high_parallel|^$testcase$" + elif [[ $(echo $third_cpu_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then + single_card_tests_third_high_parallel="$single_card_tests_third_high_parallel|^$testcase$" elif [[ $(echo $tetrad_parallel_job$two_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then - single_card_tests_two_parallel="$single_card_tests_two_parallel|^$testcase$" + single_card_tests_medium_parallel="$single_card_tests_medium_parallel|^$testcase$" else single_card_tests_non_parallel="$single_card_tests_non_parallel|^$testcase$" fi @@ -1271,24 +1281,42 @@ set +x matchstr='' testcase='' done <<< "$test_cases"; - - card_test "$single_card_tests_high_parallel" 1 6 # run cases the most each time with single GPU - card_test "$single_card_tests_two_parallel" 1 2 # run cases 2 job each time with single GPU - card_test "$single_card_tests_non_parallel" 1 # run cases 1 job each time with single GPU - card_test "$multiple_card_tests_two_parallel" 2 2 # run cases 2 job each time with two GPUs - card_test "$multiple_card_tests_non_parallel" 2 # run cases 1 job each time with two GPUs - - card_test "$exclusive_tests_two_parallel" -1 2 # run cases exclusively, in this cases would be run with 2/4/8 GPUs - card_test "$exclusive_tests_non_parallel" -1 # run cases exclusively, in this cases would be run with 2/4/8 GPUs + ut_actual_total_startTime_s=`date +%s` + + single_ut_startTime_s=`date +%s` + card_test "$single_card_tests_high_parallel" 1 24 # run cases 24 job each time with single GPU + card_test "$single_card_tests_secondary_high_parallel" 1 15 # run cases 15 job each time with single GPU + card_test "$single_card_tests_third_high_parallel" 1 12 # run cases 12 job each time with single GPU + card_test "$single_card_tests_medium_parallel" 1 7 # run cases 7 job each time with single GPU + card_test "$single_card_tests_non_parallel" 1 2 # run cases 2 job each time with single GPU + single_ut_endTime_s=`date +%s` + + multi_ut_startTime_s=`date +%s` + card_test "$multiple_card_tests_medium_parallel" 2 4 # run cases 2 job each time with two GPUs + card_test "$multiple_card_tests_non_parallel" 2 2 # run cases 1 job each time with two GPUs + multi_ut_endTime_s=`date +%s` + + exclu_ut_startTime_s=`date +%s` + card_test "$exclusive_tests_high_parallel" -1 5 # run cases exclusively, in this cases would be run with 2/4/8 GPUs + card_test "$exclusive_tests_medium_parallel" -1 3 # run cases exclusively, in this cases would be run with 2/4/8 GPUs + card_test "$exclusive_tests_non_parallel" -1 2 # run cases exclusively, in this cases would be run with 2/4/8 GPUs + exclu_ut_endTime_s=`date +%s` + + echo "ipipe_log_param_1_TestCases_Total_Time: $[ $single_ut_endTime_s - $single_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt + echo "ipipe_log_param_2_TestCases_Total_Time: $[ $multi_ut_endTime_s - $multi_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt + echo "ipipe_log_param_Exclusive_TestCases_Total_Time: $[ $exclu_ut_endTime_s - $exclu_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt + collect_failed_tests rm -f $tmp_dir/* exec_times=0 retry_unittests_record='' - retry_time=3 - exec_time_array=('first' 'second' 'third') + retry_time=4 + exec_time_array=('first' 'second' 'third' 'fourth') + parallel_failed_tests_exec_retry_threshold=80 exec_retry_threshold=10 is_retry_execuate=0 + rerun_ut_startTime_s=`date +%s` if [ -n "$failed_test_lists" ];then if [ ${TIMEOUT_DEBUG_HELP:-OFF} == "ON" ];then bash $PADDLE_ROOT/tools/timeout_debug_help.sh "$failed_test_lists" # cat logs for tiemout uts which killed by ctest @@ -1297,14 +1325,30 @@ set +x need_retry_ut_arr=(${need_retry_ut_str}) need_retry_ut_count=${#need_retry_ut_arr[@]} read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) - if [ $need_retry_ut_count -lt $exec_retry_threshold ];then - while ( [ $exec_times -lt $retry_time ] ) - do + while ( [ $exec_times -lt $retry_time ] ) + do + if [[ "${exec_times}" == "0" ]] ;then + if [ $need_retry_ut_count -lt $parallel_failed_tests_exec_retry_threshold ];then + is_retry_execuate=0 + else + is_retry_execuate=1 + fi + elif [[ "${exec_times}" == "1" ]] ;then + read need_retry_ut_str <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) + need_retry_ut_arr=(${need_retry_ut_str}) + need_retry_ut_count=${#need_retry_ut_arr[@]} + if [ $need_retry_ut_count -lt $exec_retry_threshold ];then + is_retry_execuate=0 + else + is_retry_execuate=1 + fi + fi + if [[ "$is_retry_execuate" == "0" ]];then set +e retry_unittests_record="$retry_unittests_record$failed_test_lists" failed_test_lists_ult=`echo "${failed_test_lists}" |grep -Po '[^ ].*$'` set -e - if [[ "${exec_times}" == "1" ]];then + if [[ "${exec_times}" == "1" ]] || [[ "${exec_times}" == "3" ]];then if [[ "${failed_test_lists}" == "" ]];then break else @@ -1315,11 +1359,9 @@ set +x echo "This is the ${exec_time_array[$exec_times]} time to re-run" echo "=========================================" echo "The following unittest will be re-run:" - echo "${retry_unittests}" - + echo "${retry_unittests}" for line in ${retry_unittests[@]} ; do - read tmp_one_tmp <<< "$( echo $single_card_tests | grep -oEi $line )" read tmp_mul_tmp <<< "$( echo $multiple_card_tests | grep -oEi $line )" read exclusive_tmp <<< "$( echo $exclusive_tests | grep -oEi $line )" @@ -1347,7 +1389,7 @@ set +x done if [[ "$one_card_retry" != "" ]]; then - card_test "$one_card_retry" 1 + card_test "$one_card_retry" 1 4 fi if [[ "$multiple_card_retry" != "" ]]; then @@ -1357,21 +1399,22 @@ set +x if [[ "$exclusive_retry" != "" ]]; then card_test "$exclusive_retry" -1 fi - exec_times=$[$exec_times+1] failed_test_lists='' collect_failed_tests rm -f $tmp_dir/* one_card_retry='' multiple_card_retry='' - exclusive_retry='' - done - else - # There are more than 10 failed unit tests, so no unit test retry - is_retry_execuate=1 - fi + exclusive_retry='' + fi + done fi + rerun_ut_endTime_s=`date +%s` + + echo "ipipe_log_param_Rerun_TestCases_Total_Time: $[ $rerun_ut_endTime_s - $rerun_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt + ut_actual_total_endTime_s=`date +%s` + echo "ipipe_log_param_actual_TestCases_Total_Time: $[ $ut_actual_total_endTime_s - $ut_actual_total_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt if [[ "$EXIT_CODE" != "0" ]]; then show_ut_retry_result fi @@ -1380,7 +1423,20 @@ set -ex } function show_ut_retry_result() { - if [[ "$is_retry_execuate" != "0" ]];then + if [ "$SYSTEM" == "Darwin" ]; then + exec_retry_threshold_count=10 + else + exec_retry_threshold_count=80 + fi + if [[ "$is_retry_execuate" != "0" ]] && [[ "${exec_times}" == "0" ]] ;then + failed_test_lists_ult=`echo "${failed_test_lists}" | grep -Po '[^ ].*$'` + echo "=========================================" + echo "There are more than ${exec_retry_threshold_count} failed unit tests in parallel test, so no unit test retry!!!" + echo "=========================================" + echo "The following tests FAILED: " + echo "${failed_test_lists_ult}" + exit 8; + elif [[ "$is_retry_execuate" != "0" ]] && [[ "${exec_times}" == "1" ]];then failed_test_lists_ult=`echo "${failed_test_lists}" | grep -Po '[^ ].*$'` echo "=========================================" echo "There are more than 10 failed unit tests, so no unit test retry!!!" diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index e6a628ae2f..ccf849a297 100644 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -14,6 +14,660 @@ import sys import os +import platform + +# mem=0 : It run 24 job each time in Single cases; 4 job each time in Multi cases; 5 job each time in exclusive cases +HIGH_PARALLEL_JOB_NEW = [ + 'mask_util_test', 'test_communicator_ps_gpu', 'preprocess_local_imagenet', + 'test_nearest_interp_v2_mkldnn_op', 'op_call_stack_test', + 'test_fleet_amp_meta_optimizer', 'test_mkldnn_scale_matmul_fuse_pass', + 'bfloat16_gpu_test', 'test_fc_gru_fuse_pass_cc', 'device_worker_test', + 'test_custom_conj', 'save_load_util_test', 'infer_io_utils_tester', + 'test_transpose_bf16_mkldnn_op', 'test_container', 'cpu_helper_test', + 'test_fake_init_op', 'test_concat_int8_mkldnn_op', + 'test_lookup_table_dequant_op', 'test_broadcast_shape', + 'test_program_to_string', 'test_generate_mask_labels_op', + 'test_eager_deletion_dynamic_rnn_base', 'test_global_var_getter_setter', + 'test_ifelse_basic', 'test_get_set_flags', 'dim_test', + 'test_py_reader_return_list', 'test_fleet_meta_optimizer_base', + 'test_py_reader_error_msg', 'scope_test', 'buffered_allocator_test', + 'test_scaled_dot_product_attention', 'prune_test', 'test_chunk_eval_op', + 'test_static_analysis', 'test_fleet_lars_meta_optimizer', + 'heter_server_test', 'test_while_op', + 'test_runtime_and_compiletime_exception', 'test_precision_recall_op', + 'test_get_inputs_outputs_in_block', 'test_lite_engine_op', + 'test_repeated_fc_relu_fuse_pass_cc', + 'test_mkldnn_matmul_op_output_fuse_pass', 'cudnn_helper_test', + 'test_check_abi', 'data_type_test', 'test_recurrent_op', 'test_asp_utils', + 'test_paddle_inference_api', 'test_reference_count_pass_last_lived_ops', + 'test_op_support_gpu', 'test_conditional_block', + 'test_fleet_rolemaker_init', 'test_pybind_interface', 'test_io_save_load', + 'test_split_and_merge_lod_tensor_op', 'test_fusion_lstm_int8_mkldnn_op', + 'test_benchmark', 'test_protobuf', 'test_tdm_sampler_op', + 'test_teacher_student_sigmoid_loss_op', 'test_transpose_int8_mkldnn_op', + 'test_transpose_mkldnn_op', 'test_fleet_rolemaker_4', 'to_string_test', + 'test_c_comm_init_all_op', 'test_bilinear_interp_mkldnn_op', + 'test_split_bf16_mkldnn_op', 'test_cpu_quantize_squash_pass', + 'test_batch_norm_act_fuse_pass', 'test_mkldnn_op_inplace', + 'test_seqpool_concat_fuse_pass', 'test_analyzer_save_model', + 'test_exception', 'test_fc_lstm_fuse_pass', 'test_similarity_focus_op', + 'test_conv_batch_norm_mkldnn_fuse_pass', 'test_sequence_last_step', + 'test_mkldnn_cpu_bfloat16_pass', 'op_debug_string_test', + 'test_quant2_int8_mkldnn_pass', 'test_layer', 'test_sampling_id_op', + 'test_nce', 'graph_helper_test', + 'test_static_shape_inferrence_for_shape_tensor', + 'test_layer_norm_mkldnn_op', 'test_fleet_launch_async', + 'test_multi_gru_fuse_pass', 'test_hash_op', 'test_rpn_target_assign_op', + 'test_concat_bf16_mkldnn_op', 'test_fc_lstm_fuse_pass_cc', 'test_version', + 'gather_test', 'test_mkldnn_inplace_fuse_pass', 'test_reshape_bf16_op', + 'test_compat', 'test_data_feeder', 'cpu_vec_test', + 'test_distributed_strategy', 'test_hsigmoid_op', 'test_hooks', + 'test_fleet_base_2', 'op_kernel_type_test', + 'test_layer_norm_bf16_mkldnn_op', 'test_fleetrun', 'cpu_info_test', + 'brpc_utils_test', 'test_fusion_seqexpand_concat_fc_op', 'test_dataset_voc', + 'test_analyzer_capi_exp_int', 'test_post_training_quantization_resnet50', + 'cuda_helper_test', 'test_conv_concat_relu_mkldnn_fuse_pass', + 'test_bf16_utils', 'test_sum_bf16_mkldnn_op', + 'test_unsqueeze2_eltwise_fuse_pass', 'dense_table_test', + 'test_collective_optimizer', 'test_origin_info', 'test_dgc_optimizer', + 'test_avoid_twice_initialization', 'test_reduce_bf16_mkldnn_op', + 'test_mkldnn_conv_bias_fuse_pass', 'cow_ptr_tests', 'eigen_test', + 'reader_blocking_queue_test', 'test_fusion_gru_op', 'operator_test', + 'test_fusion_gru_int8_mkldnn_op', 'test_cpu_bfloat16_pass', + 'test_multiprocess_dataloader_iterable_dataset_split', 'test_scope', + 'test_analyzer_bfloat16_mobilenetv2', 'test_fleet_rolemaker_2', + 'float16_test', 'test_dpsgd_op', + 'test_conv_elementwise_add_mkldnn_fuse_pass', 'test_crypto', + 'test_sgd_op_bf16', 'test_analyzer_capi_exp_ner', + 'lite_subgraph_pass_tester', 'test_tf32_cudnn', 'threadpool_test', + 'test_cpu_quantize_pass', 'test_analyzer_capi_exp_pd_tensor', 'tuple_test', + 'test_analyzer_lac', 'test_prune', 'test_bilinear_interp_v2_mkldnn_op', + 'test_lod_tensor_array', 'test_logging_utils', 'test_fleet_nocvm_1', + 'stringprintf_test', 'test_nearest_interp_mkldnn_op', + 'test_matmul_mkldnn_op', 'test_debugger', 'test_custom_attrs_jit', + 'test_lrn_mkldnn_op', 'test_set_bool_attr', 'version_test', + 'test_broadcast_to_op', 'test_squared_mat_sub_fuse_pass', + 'test_fleet_ascend_utils', 'test_layer_norm_fuse_pass', + 'test_fused_emb_seq_pool_op', 'test_imperative_data_loader_exit_func', + 'test_feed_fetch_method', 'test_protobuf_descs', 'test_fleet_unitaccessor', + 'test_sequence_scatter_op', 'test_skip_layernorm_fuse_pass', + 'test_fs_interface', 'test_gast_with_compatibility', + 'test_repeated_fc_relu_fuse_pass', 'timer_test', 'var_type_traits_test', + 'test_py_reader_sample_generator', 'test_conv2d_transpose_mkldnn_op', + 'test_fleet_runtime', 'test_rnn_cudnn_params_packing', + 'test_mkldnn_placement_pass', 'test_fc_elementwise_layernorm_fuse_pass', + 'program_desc_test', 'test_simplify_with_basic_ops_pass', + 'test_dygraph_mode_of_unittest', 'gather_op_test', 'test_trainer_desc', + 'test_matmul_bf16_mkldnn_op', 'test_analyzer_seq_conv1', + 'test_fused_embedding_fc_lstm_op', 'test_conv2d_transpose_bf16_mkldnn_op', + 'check_reduce_rank_test', 'test_progressbar', 'test_seed_op', + 'test_shrink_rnn_memory', 'test_fc_bf16_mkldnn_op', + 'test_sequence_first_step', 'test_layer_norm_fuse_pass_cc', + 'test_fusion_lstm_mkldnn_op', 'test_elementwise_add_bf16_mkldnn_op', + 'test_static_save_load_bf16', 'test_elementwise_mul_bf16_mkldnn_op', + 'test_distributions', 'operator_exception_test', 'dropout_op_test', + 'test_gpu_package_without_gpu_device', 'test_detection_map_op', + 'test_zeros_op', 'test_launch_coverage', + 'test_mkldnn_conv_activation_fuse_pass', 'test_inference_model_io', + 'heter_listen_and_server_test', 'test_fusion_repeated_fc_relu_op', + 'cudnn_desc_test', 'test_beam_search_op', 'test_var_conv_2d', + 'test_listen_and_serv_op', 'test_dequantize_mkldnn_op', + 'test_analyzer_capi_exp_pd_threads', 'test_selected_rows', + 'test_fleet_sharding_meta_optimizer', 'test_inference_api', + 'test_mkldnn_inplace_pass', 'test_data_generator', + 'test_deprecated_memory_optimize_interfaces', 'test_ir_skip_layernorm_pass', + 'broadcast_op_test', 'test_multihead_matmul_fuse_pass', + 'test_lookup_table_bf16_op', 'test_positive_negative_pair_op', 'init_test', + 'test_tensorrt', 'test_check_error', 'test_program', 'mmap_allocator_test', + 'test_reshape_transpose_matmul_mkldnn_fuse_pass', 'test_communicator_async', + 'test_downpoursgd', 'variable_test', 'test_quantization_mkldnn_pass', + 'test_quantize_mkldnn_op', 'test_create_op_doc_string', + 'test_analyzer_lexical_gru_bfloat16', 'test_imperative_data_loader_process', + 'assign_op_test', 'test_analyzer_capi_exp_xpu', 'test_conv_bn_fuse_pass_cc', + 'test_recommender_system', 'test_ones_op', 'test_fc_mkldnn_op', + 'test_load_op_xpu', 'test_pool2d_int8_mkldnn_op', 'test_mul_int8_mkldnn_op', + 'test_scale_matmul_fuse_pass', 'test_fleet_graph_executor', 'decorator_test', + 'test_collective_base', 'test_lod_rank_table', 'test_multi_gru_mkldnn_op', + 'test_eager_deletion_conditional_block', 'op_proto_maker_test', + 'test_mkldnn_op_nhwc', 'test_fc_act_mkldnn_fuse_pass', 'test_fleet_base_3', + 'test_basic_rnn_name', 'test_query_op', 'test_fleet_base_4', + 'save_load_op_test', 'test_batch_sampler', + 'test_image_classification_layer', 'test_fusion_gru_mkldnn_op', + 'graph_test', 'test_ir_graph', 'test_hapi_hub_model', + 'test_requantize_mkldnn_op', 'test_depthwise_conv_mkldnn_pass', + 'test_fleet_metric', 'test_fc_fuse_pass_cc', 'test_fleet_private_function', + 'test_fleet', 'test_executor_check_feed', 'test_py_reader_lod_level_share', + 'nccl_context_test', 'inlined_vector_test', + 'test_generate_proposal_labels_op', 'test_analyzer_capi_exp_pd_config', + 'test_locality_aware_nms_op', 'test_imperative_decorator', + 'test_npair_loss_op', 'test_ps_dispatcher', 'test_analyzer_rnn2', + 'test_multi_gru_seq_fuse_pass', 'test_filter_by_instag_op', 'test_switch', + 'test_matmul_transpose_reshape_fuse_pass', 'test_mkldnn_caching', + 'test_fetch_var', 'op_compatible_info_test', 'complex_test', + 'test_fleet_static_mp_layers', 'test_aligned_allocator', + 'test_analyzer_transformer_fuse', 'test_sequence_topk_avg_pooling', + 'test_analyzer_lexical_gru', 'test_broadcast_error', 'test_context_manager', + 'test_registry', 'brpc_service_sparse_sgd_test', 'test_operator', + 'test_mkldnn_conv_concat_relu_mkldnn_fuse_pass', 'test_collective_api_base', + 'test_entry_attr', 'test_get_places_op', 'test_softmax_mkldnn_op', + 'test_dynrnn_static_input', 'auto_growth_best_fit_allocator_test', + 'test_batch_norm_mkldnn_op', 'test_bpr_loss_op', + 'no_need_buffer_vars_inference_test', 'test_fleet_cc', 'test_download', + 'test_fleet_recompute_meta_optimizer', 'test_seqpool_cvm_concat_fuse_pass', + 'test_common_infer_shape_functions', 'test_fusion_seqpool_concat_op', + 'test_op_compat_sensible_pass', 'test_fs', 'test_fc_rnn_mkldnn_fuse_pass', + 'split_test', 'test_fusion_group_pass', 'test_fusion_lstm_bf16_mkldnn_op', + 'test_executor_feed_non_tensor', 'test_var_info', 'test_reducescatter', + 'test_fleet_ps', 'test_check_import_scipy', 'test_load_vars_shape_check', + 'test_nn_functional_embedding_static', 'test_fleet_rolemaker_new', + 'test_imperative_base', 'dist_multi_trainer_test', + 'test_mine_hard_examples_op', 'test_post_training_quantization_lstm_model', + 'aes_cipher_test', 'test_analyzer_zerocopytensor_tensor', 'rw_lock_test', + 'exception_holder_test', 'enforce_test', 'test_rnn_memory_helper_op', + 'ddim_test', 'test_eager_deletion_padding_rnn', 'test_is_test_pass', + 'test_fusion_seqconv_eltadd_relu_op', 'test_fleet_localsgd_meta_optimizer', + 'node_test', 'test_analyzer_text_classification', + 'test_seq_concat_fc_fuse_pass', 'test_imperative_numpy_bridge', + 'test_adaptive_pool2d_convert_global_pass', 'test_lookup_table_v2_bf16_op', + 'test_operator_desc', 'test_elementwise_mul_mkldnn_op', + 'test_fetch_handler', 'test_cpu_bfloat16_placement_pass', + 'test_match_matrix_tensor_op', 'test_fleet_run_random_port', + 'test_mkldnn_matmul_transpose_reshape_fuse_pass', + 'test_fleet_lamb_meta_optimizer', 'test_op_version', + 'fused_broadcast_op_test', 'stringpiece_test', 'test_tdm_child_op', + 'test_imperative_group', 'test_analyzer_capi_exp', + 'test_post_training_quantization_mobilenetv1', 'test_load_op', + 'test_executor_and_use_program_cache', 'op_registry_test', + 'test_create_global_var', 'test_dispatch_jit', 'table_test', 'test_full_op', + 'test_recv_save_op', 'test_fusion_lstm_op', + 'test_eager_deletion_recurrent_op', 'brpc_service_dense_sgd_test', + 'op_tester', 'test_eager_deletion_mnist', 'test_infer_shape', + 'test_fleet_rolemaker', 'test_entry_attr2', 'test_monitor', + 'test_require_version', 'test_function_spec', 'test_image', + 'lod_tensor_test', 'place_test', 'test_fleet_launch_cloud', + 'test_conv2d_bf16_mkldnn_op', + 'test_parallel_executor_run_load_infer_program', 'scatter_test', + 'graph_to_program_pass_test', 'test_lod_tensor_array_ops', + 'test_embedding_eltwise_layernorm_fuse_pass', 'complex_gpu_test', + 'save_load_combine_op_test', 'test_logger', 'test_analyzer', 'test_utils', + 'barrier_table_test', 'test_memory_usage', 'test_sysconfig', 'reader_test', + 'test_conv_bias_mkldnn_fuse_pass', 'math_function_test', + 'beam_search_decode_op_test', 'save_quant2_model_resnet50', 'bfloat16_test', + 'test_scale_bf16_mkldnn_op', 'test_fp16_utils', + 'test_cpu_quantize_placement_pass', 'test_slice_var', 'test_analyzer_ocr', + 'test_flags_use_mkldnn', 'pass_test', 'test_trainable', + 'test_sync_batch_norm_pass', 'lodtensor_printer_test', 'test_calc_gradient', + 'test_create_parameter', 'test_infer_no_need_buffer_slots', + 'test_run_fluid_by_module_or_command_line', 'test_boxps', + 'test_initializer', 'test_fusion_squared_mat_sub_op', 'test_desc_clone', + 'test_analyzer_mobilenet_depthwise_conv', 'test_analyzer_pyramid_dnn', + 'test_analyzer_detect_functional_mkldnn', 'errors_test', 'test_name_scope', + 'var_type_inference_test', 'test_const_value', + 'test_spawn_and_init_parallel_env', 'test_fleet_gradient_scale', + 'unroll_array_ops_test', 'test_fc_gru_fuse_pass', 'op_version_registry_test', + 'test_cudnn_placement_pass', 'cipher_utils_test', 'test_program_code', + 'test_save_model_without_var', 'program_processing_test', + 'test_fleet_distributed_strategy', 'test_hybrid_parallel_topology', + 'test_ascend_trigger', 'test_fleet_rolemaker_3', + 'test_conv_activation_mkldnn_fuse_pass', 'test_fusion_gru_bf16_mkldnn_op', + 'test_model_cast_to_bf16', 'test_quantize_transpiler', + 'conditional_block_op_test', 'test_fleet_gradient_merge_meta_optimizer', + 'test_graph_pattern_detector', 'test_fleet_fp16_allreduce_meta_optimizer', + 'test_unique_name', 'test_multi_out_jit', 'test_attention_lstm_op', + 'test_mkldnn_quantizer_config', 'data_layout_transform_test', + 'test_conv2d_int8_mkldnn_op', 'test_fusion_seqpool_cvm_concat_op', + 'save_quant2_model_gru', 'test_generator', 'test_sum_mkldnn_op', + 'test_fleet_util', 'test_fleet_dgc_meta_optimizer', + 'selected_rows_functor_test', 'test_default_scope_funcs', + 'test_communicator_sync', 'test_communicator_half_async', + 'test_dynrnn_gradient_check', 'test_pool2d_bf16_mkldnn_op', + 'test_table_printer', 'test_framework_debug_str', 'test_dist_fleet_ps2', + 'test_collective_scatter_api', 'test_dist_sparse_tensor_load_ftrl', + 'test_dist_mnist_dgc_nccl', 'test_dist_oneps', 'test_dist_tree_index', + 'test_dist_fleet_ps', 'test_dist_fleet_a_sync_optimizer_sync', + 'test_dist_fleet_decay', 'test_auto_checkpoint2', + 'test_dist_fleet_heter_ctr', 'test_dist_fleet_simnet', + 'test_dist_sparse_load_ps1', 'test_dist_mnist_fleet_save', + 'test_dist_fleet_ps7', 'test_dist_mnist_fleetapi', + 'test_dist_sparse_tensor_load_adam', 'test_dist_fleet_ps_gpu_ctr', + 'test_dist_mnist_ring_allreduce', 'test_dist_op', 'test_new_group_api', + 'test_dist_fleet_heter_base', 'test_collective_split_col_linear', + 'test_parallel_executor_mnist', 'test_dist_fleet_ctr2', + 'test_dist_fleet_heter_program', 'test_dist_fleet_ctr', + 'test_collective_allreduce_api', 'test_dataloader_unkeep_order', + 'test_dataloader_keep_order', 'test_dist_se_resnext_sync', 'test_hdfs2', + 'test_dist_fleet_ps6', 'test_dist_fleet_a_sync_optimizer_auto_async', + 'test_dist_fleet_a_sync_optimizer_auto', 'test_dist_fleet_ps9', + 'test_dist_fleet_raw_program_optimizer_fuse_allreduce', + 'test_dist_fleet_ps11', 'test_dist_fleet_ps8', + 'test_dist_mnist_fp16_allreduce', 'test_dist_fleet_ps12', + 'test_collective_split_row_linear', 'test_collective_reduce_api', + 'test_multiprocess_dataloader_exception', 'test_collective_allgather_api', + 'test_dist_fleet_ps10', 'test_dist_sparse_tensor_load_rmsprop', + 'test_collective_split_embedding_none_divisible', + 'test_parallel_dygraph_dataparallel', 'test_auto_checkpoint3', + 'test_fleet_graph_execution_meta_optimizer', 'test_auto_checkpoint1', + 'test_dist_fleet_ps3', 'test_dist_mnist_pg', 'test_pipeline_parallel', + 'test_dist_fleet_ps5', 'test_dist_fleet_sparse_embedding_ctr', + 'test_collective_broadcast_api', 'test_fleet_checkpoint', + 'retry_allocator_test', 'test_auto_checkpoint_multiple', + 'test_dist_mnist_backward_deps', 'test_dist_mnist_multi_comm', 'test_hdfs3', + 'test_hdfs1', 'test_dist_allreduce_op', + 'test_parallel_dygraph_sparse_embedding', 'test_dist_se_resnext_dgc', + 'test_dist_sharding_save', 'test_dist_fleet_a_sync_optimizer_async', + 'test_gen_nccl_id_op', 'test_auto_checkpoint', + 'test_collective_split_embedding', + 'test_parallel_dygraph_sparse_embedding_over_height', + 'test_dist_sparse_tensor_load_momentum', 'test_auto_checkpoint_dist_basic', + 'test_dist_fleet_ps4', 'test_collective_alltoall_api', + 'test_dist_fleet_raw_program_optimizer', 'test_parallel_dygraph_mp_layers', + 'test_dist_fleet_geo', 'test_fleet_raw_program_meta_optimizer', + 'test_sync_batch_norm_op', 'test_dist_mnist_batch_merge', + 'test_fleet_launch_ps', 'test_dist_sparse_tensor_load_sgd', + 'test_dist_fleet_a_sync_optimizer_auto_geo', + 'test_dist_lookup_sparse_table_fuse_ops', + 'test_dist_fleet_a_sync_optimizer_geo', + 'test_multiprocess_dataloader_iterable_dataset_static', + 'test_dist_fleet_grad_clip', + 'test_fleet_pipeline_meta_optimizer_with_recompute', + 'test_dist_sparse_load_ps0', 'test_collective_barrier_api', + 'test_fleet_pipeline_meta_optimizer', 'test_parallel_dygraph_mnist', + 'test_dist_sparse_tensor_load_adagrad', 'test_new_group', + 'test_imperative_signal_handler', 'test_parallel_dygraph_sharding_parallel', + 'test_dist_hapi_model', 'test_dist_mnist_gradient_merge' +] + +# mem=0 but always timeout or failed : It run 15 job each time in Single cases; +SECONDARY_HIGH_PARALLEL_JOB_NEW = [ + 'test_dataset_conll05', 'test_conv3d_mkldnn_op', 'test_matrix_nms_op', + 'test_data', 'test_analyzer_paddletensor_tensor', + 'test_linear_chain_crf_op', 'test_analyzer_multi_model_prediction', + 'test_default_dtype', 'device_context_test', 'test_analyzer_googlenet', + 'jit_kernel_test', 'profiler_test', 'preprocess_local_pascalvoc', + 'test_conv2d_transpose_layer', 'test_analyzer_int8_googlenet', + 'test_analyzer_seq_pool1_compare_determine', 'save_quant2_model_ernie', + 'test_parallel_executor_seresnext_with_fuse_all_reduce_cpu', + 'test_dataset_uci_housing', 'test_parallel_executor_seresnext_base_cpu', + 'test_dataset_download', 'test_quant_int8_mobilenetv1_mkldnn', + 'test_crf_decoding_op', 'test_conv3d_transpose_layer', + 'test_quant2_int8_mobilenetv1_mkldnn', 'test_softmax_bf16_mkldnn_op', + 'test_quant2_int8_resnet50_range_mkldnn', 'test_pool2d_mkldnn_op', + 'test_flags_mkldnn_ops_on_off', 'test_c_comm_init_op', + 'test_uniform_random_bf16_op', 'test_custom_concat', + 'test_weight_quantization_mobilenetv1', 'test_retinanet_detection_output', + 'test_concat_mkldnn_op', 'test_gaussian_random_mkldnn_op', + 'test_parallel_executor_seresnext_with_reduce_cpu', 'test_dataset_imikolov', + 'test_analyzer_rnn1', 'test_conv2d_mkldnn_op', 'test_conv3d_layer', + 'test_error_clip', 'selected_rows_test', 'test_static_save_load_large', + 'test_bipartite_match_op', 'test_conv2d_layer', + 'test_analyzer_seq_pool1_fuse_statis', 'test_split_plugin', + 'test_analyzer_small_dam', 'test_analyzer_capi_exp_gpu', + 'test_quant2_int8_resnet50_channelwise_mkldnn', 'test_analyzer_bert', + 'test_directory_migration', 'test_elementwise_add_mkldnn_op', + 'test_quant_int8_googlenet_mkldnn', 'test_callback_early_stop', + 'test_quant2_int8_resnet50_mkldnn' +] + +# mem=0 but always timeout or failed : It run 12 job each time in Single cases; +THIRD_HIGH_PARALLEL_JOB_NEW = [ + 'test_api_impl', 'test_analyzer_seq_pool1_fuse_compare_zero_copy', + 'test_analyzer_seq_pool1_profile', 'test_analyzer_mobilenet_transpose', + 'test_analyzer_resnet50', 'test_analyzer_int8_resnet50', + 'test_analyzer_int8_mobilenetv2', 'test_analyzer_bfloat16_resnet50', + 'test_analyzer_bfloat16_mobilenetv1', 'test_analyzer_int8_mobilenet_ssd', + 'test_dataset_cifar', 'test_dataset_imdb', 'test_dataset_movielens', + 'test_datasets', 'test_allgather', 'test_c_concat', 'test_c_split', + 'test_collective_reduce', 'test_collective_sendrecv', + 'test_collective_wait', 'test_cyclic_cifar_dataset', 'test_dyn_rnn', + 'test_gru_op', 'test_multiclass_nms_op', 'test_communicator_geo', + 'test_quant_int8_mobilenetv2_mkldnn', + 'test_post_training_quantization_mnist', 'test_analyzer_seq_pool1', + 'test_analyzer_transformer', 'test_analyzer_transformer_profile', + 'test_analyzer_int8_mobilenetv1', 'test_analyzer_bfloat16_googlenet', + 'test_analyzer_quant_performance_benchmark', 'test_dataset_wmt', + 'test_allreduce', 'test_broadcast', 'test_c_identity', + 'test_collective_scatter', 'test_collective_sendrecv_api', + 'test_fleet_utils', 'test_fused_elemwise_activation_op', + 'test_group_norm_op', 'test_reducescatter_api', 'test_fleet_launch_nproc', + 'test_quant_int8_resnet50_mkldnn', 'test_quant2_int8_ernie_mkldnn', + 'convert_model2dot_ernie' +] + +# mem != 0: It run 7 job each time in Single cases; 4 job each time in Multi cases; 3 job each time in exclusive cases +TETRAD_PARALLEL_JOB_NEW = [ + 'test_meshgrid_op', 'test_gather_op', 'test_word2vec', 'test_analyzer_ner', + 'test_fetch_lod_tensor_array', 'test_adagrad_op_v2', + 'test_conv2d_fusion_op', 'test_hapi_amp', 'test_metrics', + 'test_clip_by_norm_op', 'test_lr_scheduler', 'test_generate_proposals_op', + 'test_masked_select_op', 'test_trt_anchor_generator_op', + 'test_imperative_ocr_attention_model', 'test_sentiment', 'test_chunk_op', + 'test_memcpy_op', 'test_warpctc_op', 'test_row_conv_op', + 'test_grid_sample_function', 'test_rnn_nets', 'test_pad3d_op', + 'test_imperative_mnist_sorted_gradient', 'tensor_test', + 'test_elementwise_nn_grad', 'test_tensorrt_engine_op', 'test_dot_op', + 'test_real_imag_op', 'test_adam_optimizer_fp32_fp64', 'test_reduce_op', + 'test_density_prior_box_op', 'test_top_k_op', 'test_grid_generator', + 'test_randn_op', 'test_activation_mkldnn_op', 'test_lac', 'test_pad_op', + 'test_lstmp_op', 'test_loop', 'test_pylayer_op', + 'data_device_transform_test', 'test_trt_roi_align_op', + 'test_nn_functional_hot_op', 'test_top_k_v2_op', 'test_crop_op', + 'test_conv_bn_fuse_pass', 'test_beam_search_decode_op', 'test_auc_op', + 'test_pool2d_op', 'test_gaussian_random_op', 'test_maximum_op', + 'test_rnn_cell_api', 'device_code_test', 'test_ir_inplace_pass', + 'test_cos_sim_op', 'test_lite_tensor_utils', 'test_fit_a_line', + 'test_mish_op', 'test_transpose_op', 'test_mean_iou', + 'test_conv3d_transpose_op', 'test_jit_save_load', 'test_unsqueeze2_op', + 'test_eager_deletion_while_op', 'test_zeros_like_op', 'test_c_embedding_op', + 'test_regularizer', 'zero_copy_tensor_test', 'test_tensor_shape', + 'test_resnet', 'test_dygraph_weight_norm', 'test_tracer', 'test_list', + 'test_sequence_concat', 'test_adaptive_avg_pool1d', + 'test_elementwise_div_op', 'test_conv1d_transpose_layer', 'test_adamw_op', + 'trt_fc_prelu_test', 'test_temporal_shift_op', + 'test_naive_best_fit_gpu_memory_limit', 'dlpack_tensor_test', + 'test_elementwise_max_op', 'test_typing', 'test_asp_pruning_2d_greedy', + 'test_fake_dequantize_op', 'test_crop_tensor_op', + 'test_imperative_load_static_param', 'test_imperative_qat_user_defined', + 'test_anchor_generator_op', 'test_if_else_op', 'test_prepare_op', + 'test_conj_op', 'test_imperative_hook_for_layer', 'test_roi_pool_op', + 'test_strided_slice_op', 'test_norm_all', 'test_weight_decay', + 'test_functional_conv2d', 'test_functional_conv3d_transpose', + 'test_imperative_layer_trainable', 'test_imperative_data_parallel', + 'test_digamma_op', 'test_distribution', 'test_box_clip_op', + 'custom_tensor_test', 'test_marker_op', 'test_dataloader_early_reset', + 'test_gather_nd_op', 'test_tensor_register_hook', 'test_retain_graph', + 'test_network_with_dtype', 'test_basic_api_transformation', 'test_diag', + 'test_lod_array_length_op', 'test_reinforcement_learning', + 'test_softmax_op', 'test_fc_fuse_pass', 'test_adaptive_max_pool2d', + 'test_inverse_op', 'test_declarative', 'test_imperative_double_grad', + 'test_tensor_methods', 'test_pool1d_api', 'system_allocator_test', + 'test_print', 'test_tensor_type_promotion', 'test_bce_with_logits_loss', + 'test_tensor', 'test_cross_op', 'concat_test', 'test_ast_util', + 'test_proximal_adagrad_op', 'test_pairwise_distance', + 'test_imperative_mnist', 'test_beam_search_decoder', + 'test_build_strategy_fusion_group_pass', 'test_dygraph_spectral_norm', + 'test_scale_mkldnn_op', 'test_load_state_dict_from_old_format', + 'test_margin_rank_loss_op', 'test_lookup_table_v2_op', + 'test_mix_precision_all_reduce_fuse', 'test_spp_op', 'test_op_converter', + 'mixed_vector_test', 'test_roi_align_op', 'test_pad_constant_like', + 'test_mul_op', 'test_spectral_norm_op', 'test_transformer', + 'test_for_enumerate', 'test_variable_trans_func', + 'test_squared_l2_distance_op', 'test_quantize_transpiler_v2', + 'test_im2sequence_op', 'test_reader_reset', 'test_one_hot_op', + 'test_adaptive_max_pool1d', 'test_label_smooth_op', + 'test_parallel_executor_fetch_feed', 'test_cast', + 'test_parallel_dygraph_sync_batch_norm', 'test_collect_fpn_proposals_op', + 'test_expand_as_v2_op', 'test_device', 'test_code_generator', + 'test_asp_pruning_2d_best', 'test_fleet_with_asp', 'test_pool2d_api', + 'test_mean_op', 'test_is_tensor', 'test_run_program_op', + 'test_cuda_random_seed', 'test_linear_interp_op', + 'test_fuse_all_reduce_pass', 'tensor_util_test', 'test_median', + 'test_linear', 'test_imperative_qat_amp', + 'test_truncated_gaussian_random_op', 'test_lstm_cudnn_op', + 'copy_same_tensor_test', 'test_squeeze2_op', + 'naive_best_fit_allocator_test', 'test_model', 'test_py_reader_combination', + 'test_prior_box_op', 'test_matmul_v2_mkldnn_op', 'test_sum_op', + 'test_paddle_imperative_double_grad', 'test_norm_op', 'test_pool3d_api', + 'test_imperative_gan', 'test_sequence_softmax_op', 'test_rand_op', + 'test_expand_v2_op', 'test_word2vec_book', 'test_histogram_op', + 'test_min_op', 'test_mse_loss', 'test_sign_op', + 'selected_rows_functor_gpu_test', 'test_fleet_base', 'test_logsumexp', + 'test_detection', 'test_image_classification_fp16', 'test_random_seed', + 'test_op_function_generator', 'test_unique_with_counts', + 'test_complex_elementwise_layers', 'test_array_read_write_op', + 'test_fusion_group_op', 'test_imperative_layer_apply', + 'test_executor_return_tensor_not_overwriting', + 'test_optimizer_in_control_flow', 'test_lookup_table_op', 'test_randint_op', + 'test_convert_call', 'test_sigmoid_cross_entropy_with_logits_op', + 'copy_cross_scope_test', 'test_normalization_wrapper', + 'test_pretrained_model', 'test_flip', 'test_cosine_similarity_api', + 'test_cumsum_op', 'test_range', 'test_log_loss_op', 'test_where_index', + 'test_tril_triu_op', 'test_lod_reset_op', 'test_lod_tensor', 'test_addmm_op', + 'test_index_select_op', 'test_nvprof', 'test_index_sample_op', + 'test_unstack_op', 'test_increment', 'strided_memcpy_test', + 'test_target_assign_op', 'test_trt_dynamic_shape_transformer_prune', + 'test_box_decoder_and_assign_op', 'test_trt_dynamic_shape', 'test_mnist', + 'test_convert_operators', 'test_fill_any_like_op', 'test_fill_constant_op', + 'test_callback_reduce_lr_on_plateau', 'test_tile_op', 'test_logical', + 'test_deformable_conv_op', 'test_elementwise_add_grad_grad', + 'test_simple_rnn_op', 'test_bicubic_interp_op', 'test_batch_norm_op_v2', + 'test_trt_slice_plugin', 'test_custom_relu_op_jit', + 'test_math_op_patch_var_base', 'test_se_resnet', 'test_device_guard', + 'test_elementwise_div_grad_grad', 'test_minus_op', 'test_shard_index_op', + 'test_dygraph_recompute', 'test_momentum_op', 'test_trt_nearest_interp_op', + 'test_modelaverage', 'test_compare_reduce_op', 'test_affine_grid_op', + 'test_allclose_layer', 'test_elementwise_pow_op', 'test_trt_subgraph_pass', + 'test_adaptive_avg_pool2d', 'test_functional_conv3d', + 'test_executor_and_mul', 'test_kron_op', 'test_cast_mkldnn_op', + 'test_imperative_auto_prune', 'allocator_facade_frac_flags_test', + 'test_fill_zeros_like_op', 'test_gather_tree_op', 'test_elementwise_mul_op', + 'test_cycle_gan', 'test_parallel_executor_transformer_auto_growth', + 'test_bitwise_op', 'test_uniform_random_op', 'trt_split_converter_test', + 'test_huber_loss_op', 'test_slice', 'test_label_smooth_functional', + 'test_conv_shift_op', 'test_imperative_optimizer_v2', 'test_len', + 'test_imperative_named_members', 'test_sequence_reshape', + 'test_elementwise_min_op', 'test_flatten2_op', 'test_param_guard', + 'test_imperative_ptb_rnn', 'test_batch_fc_op', 'test_Tensor_type', + 'test_complex_getitem', 'lod_tensor_gpu_test', 'im2col_test', + 'test_unbind_op', 'test_imperative_ptq', 'test_auc_single_pred_op', + 'test_imperative_reinforcement', 'test_tf32_cublas', 'test_return', + 'test_py_reader_push_pop', 'test_lstm', 'test_dygraph_mnist_fp16', + 'test_shuffle_channel_op', 'test_partial_concat_op', + 'test_fill_zeros_like2_op', 'test_deformable_conv_v1_op', + 'test_complex_grad_accumulated', 'test_sequence_mask', 'test_fill_op', + 'test_imperative_deepcf', 'test_reorder_lod_tensor', + 'test_cross_entropy_loss', 'test_multiply', 'test_partial_program', + 'test_fetch_feed', 'test_group', 'test_trt_reduce_sum_op', + 'data_type_transform_test', 'test_gru_rnn_op', 'test_cudnn_grucell', + 'test_argsort_op', 'test_batch_norm_op', 'test_inplace', + 'test_deprecated_decorator', 'test_complex_cast', 'test_diag_v2', + 'test_iou_similarity_op', 'test_inplace_auto_generated_apis', 'test_dataset', + 'test_bilinear_api', 'test_empty_like_op', 'test_imperative_layer_children', + 'nccl_op_test', 'test_tree_conv_op', 'test_share_data_op', + 'test_ir_memory_optimize_transformer', 'test_lod_append_op', + 'test_math_op_patch', 'test_base_layer', 'test_dequantize_log_op', + 'test_complex_matmul', 'test_prelu_op', 'test_l1_norm_op', + 'test_rmsprop_op', 'test_fuse_bn_act_pass', 'test_inplace_addto_strategy', + 'test_ptb_lm_v2', 'test_paddle_save_load', 'test_prelu_mkldnn_op', + 'test_box_coder_op', 'test_atan2_op', 'test_unsqueeze_op', 'test_profiler', + 'test_affine_channel_op', 'test_leaky_relu_grad_grad_functor', + 'test_ctc_align', 'test_fuse_relu_depthwise_conv_pass', 'test_complex_kron', + 'test_imperative_skip_op', 'test_dgc_op', 'test_regularizer_api', + 'test_nll_loss', 'test_imperative_layers', 'test_rnn_decode_api', + 'test_imperative_partitial_backward', 'test_where_op', 'test_std_layer', + 'test_ir_embedding_eltwise_layernorm_fuse_pass', 'test_multihead_attention', + 'test_ir_memory_optimize_ifelse_op', 'test_grid_sampler_op', + 'test_initializer_nn', 'test_var_base', 'test_fuse_elewise_add_act_pass', + 'test_select_input_output_op', 'test_lstm_op', 'test_break_continue', + 'test_imperative_parallel_coalesce_split', 'test_expand_as_op', + 'test_user_defined_quantization', 'test_tensor_to_list', + 'test_limit_gpu_memory', 'test_adamax_api', + 'test_softmax_mask_fuse_upper_triangle_op', 'test_fake_quantize_op', + 'vol2col_test', 'test_cast_op', 'test_proximal_gd_op', 'test_mul_nn_grad', + 'test_full_like_op', 'test_imperative_static_runner_while', + 'trt_instance_norm_test', 'test_elementwise_mod_op', + 'test_grad_clip_minimize', 'test_one_hot_v2_op', 'test_complex_sum_layer', + 'test_isfinite_v2_op', 'test_is_empty_op', 'test_simnet_v2', + 'beam_search_test', 'test_randperm_op', 'test_elementwise_add_op_inplace', + 'test_imperative_selected_rows', 'test_py_reader_using_executor', + 'test_activation_op', 'test_nn_functional_embedding_dygraph', + 'test_reshape_op', 'test_maxout_op', 'test_cudnn_lstmcell', + 'test_sigmoid_focal_loss', 'test_manual_seed', 'test_lrn_op', + 'test_ir_memory_optimize_nlp', 'test_dataset_dataloader', + 'test_complex_variable', 'test_lite_engine', 'test_neg_op', + 'test_view_op_reuse_allocation', 'test_split_op', 'test_ptb_lm', + 'test_elementwise_sub_op', 'test_compare_op', 'test_simnet', + 'test_label_semantic_roles', 'test_normal', + 'test_tensor_scalar_type_promotion_static', 'test_trt_group_norm_op', + 'test_learning_rate_scheduler', 'test_numel_op', 'test_adaptive_max_pool3d', + 'test_sequential', 'test_imperative_optimizer', 'test_subtract_op', + 'test_conv_transpose_nn_grad', 'test_sigmoid_focal_loss_op', + 'test_cuda_stream_event', 'test_sequence_pad_op', 'test_rnn_cells', + 'test_partial_sum_op', 'test_rnn_nets_static', 'test_max_op', + 'test_logical_op', 'test_squared_l2_norm_op', 'test_center_loss', + 'test_quantization_pass', 'test_imperative_gnn', + 'test_conv_elementwise_add_act_fuse_pass', 'test_roll_op', + 'test_imperative_container_layerdict', 'test_shape_op', 'test_bmm_op', + 'test_matmul_v2_op', 'test_hinge_loss_op', 'test_imperative_qat', + 'test_add_position_encoding_op', 'test_rnn_op', 'test_gradient_clip', + 'test_py_reader_pin_memory', 'test_concat_op', 'test_weight_decay_extend', + 'test_accuracy_op', 'test_cond', 'test_resnet_v2', 'test_adagrad_op', + 'test_mv_op', 'test_print_op', 'test_grad', 'test_square_error_cost', + 'test_rnn_cells_static', 'test_mkldnn_batch_norm_act_fuse_pass', + 'test_input_spec', 'test_adam_op', 'test_elementwise_floordiv_op', + 'test_eager_deletion_gru_net', 'test_diagonal_op', + 'test_imperative_static_runner_mnist', 'test_nearest_interp_op', + 'test_conv2d_transpose_op', 'test_diag_embed', 'test_imperative_basic', + 'test_merge_selectedrows_op', 'test_feed_data_check_shape_type', + 'test_complex_trace_layer', 'test_slice_op', 'test_bmn', + 'test_nn_quant_functional_layers', 'test_broadcast_tensors_op', + 'test_selu_op', 'test_group_norm_op_v2', 'test_tensor_to_numpy', + 'test_queue', 'test_rank_loss_op', 'test_trace_op', 'test_case', + 'test_prroi_pool_op', 'test_op_name_conflict', 'test_psroi_pool_op', + 'test_set_value_op', 'test_ones_like', 'test_assign_value_op', 'test_ema', + 'test_lamb_op', 'test_dgc_momentum_op', 'test_custom_grad_input', + 'test_trunc_op', 'test_bernoulli_op', 'test_custom_relu_model', + 'test_backward', 'test_conv3d_transpose_part2_op', 'test_complex_transpose', + 'test_memory_reuse_exclude_feed_var', 'test_polygon_box_transform', + 'math_function_gpu_test', 'test_program_prune_backward', + 'test_fleet_amp_init', 'test_normalize', 'test_correlation', + 'test_conv_elementwise_add2_act_fuse_pass', + 'test_imperative_container_layerlist', 'test_dequantize_abs_max_op', + 'test_fuse_optimizer_pass', 'test_optimizer', + 'test_dynamic_rnn_stop_gradient', 'test_raw_program_optimizer', 'test_pow', + 'test_inplace_softmax_with_cross_entropy', 'test_transforms', + 'test_unfold_op', 'test_assign_op', 'test_isinstance', + 'test_conv_affine_channel_fuse_pass', + 'auto_growth_best_fit_allocator_facade_test', 'test_cholesky_op', + 'test_adaptive_avg_pool3d', 'test_paddle_save_load_binary', + 'test_fused_fc_elementwise_layernorm_op', 'test_sequence_enumerate_op', + 'test_lgamma_op', 'test_modified_huber_loss_op', 'trt_quant_int8_test', + 'test_callback_visualdl', 'test_linspace', 'test_update_loss_scaling_op', + 'test_arg_min_max_op', 'test_empty_op', 'test_bce_loss', + 'test_nn_margin_rank_loss', 'test_arg_min_max_v2_op', 'test_variance_layer', + 'test_quantization_scale_pass', 'test_segment_ops', 'test_layers', + 'test_isfinite_op', 'test_imperative_qat_channelwise', 'test_eye_op', + 'test_imperative_framework', 'test_l1_loss', 'test_ifelse', + 'test_cache_program', 'test_ir_fc_fuse_pass', 'test_kldiv_loss_op', + 'test_switch_case', 'test_unique', 'test_prod_op', 'test_edit_distance_op', + 'test_sequence_expand_as', 'test_full_name_usage', 'test_glu', + 'test_pad2d_op', 'test_read_file', 'test_erf_op', 'test_sequence_unpad_op', + 'test_sequence_conv', 'allocator_facade_abs_flags_test', 'test_detach', + 'test_cross_entropy_op', 'test_wrappers', 'test_fleet_base_single', + 'test_conv_elementwise_add_fuse_pass', 'test_auto_growth_gpu_memory_limit', + 'test_sequence_reverse', 'test_fc_op', 'test_diagflat', 'test_adamax_op', + 'test_op_attr', 'paddle_infer_api_test', 'test_mixed_precision', + 'lite_mul_model_test', 'test_sort_op', 'test_scatter_op', + 'test_imperative_out_scale', 'test_vision_models', + 'test_rnn_encoder_decoder', 'test_fleet_with_asp_amp', + 'test_partial_eager_deletion_transformer', + 'test_imperative_star_gan_with_gradient_penalty', 'test_stack_op', + 'test_shuffle_batch_op', 'test_clip_op', 'test_py_func_op', + 'test_pool_max_op', 'test_log_softmax', + 'test_imperative_container_parameterlist', 'test_multiplex_op', + 'test_trt_transpose_flatten_concat_fuse_pass', + 'test_seqconv_eltadd_relu_fuse_pass', 'test_assert_op', 'test_scatter_nd_op', + 'test_sequence_expand', 'test_arange', 'test_translated_layer', + 'test_decoupled_py_reader_data_check', 'test_analyzer_ernie_large', + 'test_tensor_array_to_tensor', 'test_functional_conv2d_transpose', + 'test_error', 'test_callbacks', 'test_imperative_recurrent_usage', + 'test_deform_conv2d', 'test_coalesce_tensor_op', 'test_tsm', + 'test_fused_multihead_matmul_op', 'test_softmax_mask_fuse_op', + 'test_optimizer_grad', 'test_complex_abs', 'test_gradient_accmulator', + 'test_instance_norm_op_v2', 'test_random_crop_op', 'test_mobile_net', + 'test_parallel_executor_transformer', + 'test_tensor_scalar_type_promotion_dynamic', + 'test_eager_deletion_delete_vars', 'test_asp_pruning_1d', + 'test_imperative_auto_mixed_precision', 'test_imperative_using_non_zero_gpu', + 'test_machine_translation', 'test_flatten_op', 'test_onnx_export', + 'test_optimizer_for_varbase', 'test_fusion_transpose_flatten_concat_op', + 'best_fit_allocator_test', 'test_ir_fusion_group_pass', + 'test_trt_quant_conv2d_dequant_fuse_pass', 'test_allclose_op', + 'test_ftrl_op', 'test_elementwise_add_op', 'test_instance_norm_op', + 'test_lambv2_op', 'test_yolo_box_op', 'test_parallel_executor_drop_scope', + 'test_generator_dataloader', 'test_conv2d_transpose_op_depthwise_conv', + 'test_imperative_save_load_v2', 'test_lookahead', + 'test_moving_average_abs_max_scale_op', 'test_roi_perspective_transform_op', + 'test_tensorrt_engine', 'test_affine_grid_function', 'test_nonzero_api', + 'test_ir_memory_optimize_pass', 'test_reduce_mkldnn_op', + 'test_bilinear_interp_op', 'test_cvm_op', 'test_scale_op', 'test_matmul_op', + 'test_sequence_pool', 'test_complex_simplenet', 'test_complex_reshape', + 'test_flatten_contiguous_range_op', 'test_python_operator_overriding', + 'lite_resnet50_test', 'test_sequence_erase_op', + 'test_deformable_psroi_pooling', 'test_multi_precision_fp16_train', + 'test_adam_op_multi_thread', 'test_decoupled_py_reader', + 'test_distribute_fpn_proposals_op', 'transform_test', 'test_nan_inf', + 'test_fuse_bn_add_act_pass', 'test_unpool_op', + 'test_parallel_executor_dry_run', 'test_layer_norm_op_v2', + 'test_embedding_id_stop_gradient', 'test_mkldnn_fc_act_fuse_pass', + 'sequence_pooling_test', 'test_get_tensor_from_selected_rows_op', + 'test_imperative_ptb_rnn_sorted_gradient', 'test_hapi_hub', + 'test_reverse_op', 'test_compiled_program', 'test_lambda', 'test_adadelta_op', + 'test_nn_sigmoid_op', 'test_nearest_interp_v2_op', 'test_sequence_slice_op', + 'test_program_translator', 'test_eager_deletion_lstm_net', 'malloc_test', + 'test_size_op', 'test_analysis_predictor', 'test_recognize_digits', + 'test_parameter', 'test_transpose_flatten_concat_fuse_pass', + 'test_imperative_trace_non_persistable_inputs', 'test_pass_builder', + 'thread_local_allocator_test', 'test_variable', 'test_fsp_op', + 'test_elementwise_gradient_op', 'test_multinomial_op', + 'test_trt_shuffle_channel_detect_pass', 'test_generate_proposals_v2_op', + 'test_graph', 'test_gelu_op', 'test_sample_logits_op', + 'test_weight_normalization', 'test_activation_bf16_mkldnn_op', + 'trt_dynamic_shape_test', 'test_traced_layer_err_msg', 'test_conv1d_layer', + 'test_asp_optimize', 'test_imperative_container_sequential', 'test_bert', + 'test_transformer_api', 'test_linear_interp_v2_op', 'test_pixel_shuffle', + 'test_expand_op', 'test_save_load', 'test_dygraph_multi_forward', + 'test_dropout_op', 'test_while_loop_op', 'float16_gpu_test', 'test_dict', + 'test_bilinear_tensor_product_op', 'test_parallel_executor_pg', 'test_assert', + 'test_smooth_l1_loss_op', 'sequence_padding_test', 'test_analyzer_ernie', + 'test_minimum_op', 'test_yolov3_loss_op', 'test_decayed_adagrad_op', + 'test_split_mkldnn_op', 'test_squeeze_op', 'test_save_inference_model', + 'test_smooth_l1_loss', 'test_bilateral_slice_op', 'test_inplace_abn_op', + 'test_fetch_unmerged', 'test_parallel_executor_feed_persistable_var', + 'test_parallel_executor_fetch_isolated_var', + 'test_parallel_executor_inference_feed_partial_data', + 'test_parallel_executor_seresnext_base_gpu', + 'test_parallel_executor_test_while_train', + 'test_parallel_executor_seresnext_with_fuse_all_reduce_gpu', + 'test_parallel_ssa_graph_inference_feed_partial_data', + 'test_parallel_executor_seresnext_with_reduce_gpu', 'test_data_norm_op', + 'test_install_check', 'graph_node_test', 'trt_quant_int8_yolov3_r50_test', + 'test_trt_dynamic_shape_ernie', 'trt_mobilenet_test', + 'trt_cascade_rcnn_test', 'trt_resnext_test', 'test_activation_nn_grad', + 'test_trt_dynamic_shape_ernie_fp16_ser_deser', 'test_bilinear_interp_v2_op', + 'test_cross_entropy2_op', 'test_conv3d_op', 'test_layer_norm_op', + 'test_pool3d_op', 'test_static_save_load', 'test_trilinear_interp_v2_op', + 'test_trilinear_interp_op', 'test_trt_gather_nd_op', 'test_trt_gather_op', + 'test_trt_flatten_op', 'test_trt_instance_norm_op', 'test_trt_yolo_box_op', + 'test_trt_reshape_op', 'test_trt_reduce_mean_op', 'test_trt_pool_op', + 'test_trt_dynamic_shape_ernie_ser_deser', 'test_trt_elementwise_op', + 'test_trt_affine_channel_op', 'test_trt_conv_pass', + 'test_softmax_with_cross_entropy_op', 'test_trt_matmul', + 'test_trt_fc_fuse_pass', 'test_trt_pad_op', 'test_trt_scale_op', + 'test_trt_activation_pass', 'trt_resnet50_test', + 'test_imperative_lod_tensor_to_selected_rows', 'test_gru_unit_op', + 'test_amp_check_finite_and_scale_op', + 'test_imperative_selected_rows_to_lod_tensor', 'test_imperative_save_load', + 'test_add_reader_dependency', 'test_imperative_transformer_sorted_gradient', + 'test_bicubic_interp_v2_op', 'test_rank_attention_op', 'test_seq2seq', + 'test_space_to_depth_op', 'test_image_classification', + 'test_custom_relu_op_setup', 'test_sgd_op' +] + +# mem != 0 : It run 7 job each time in Single cases; 3 job each time in exclusive cases +TWO_PARALLEL_JOB_NEW = [ + 'test_buffer_shared_memory_reuse_pass', + 'test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass', + 'test_parallel_executor_crf', 'test_multiprocess_reader_exception', + 'buddy_allocator_test', 'test_multiprocess_dataloader_dataset', + 'test_multiprocess_dataloader_dynamic', + 'test_multiprocess_dataloader_static', 'test_imperative_resnet', + 'test_nn_grad', 'test_conv2d_op_depthwise_conv', 'test_yolov3', + 'test_conv_nn_grad', 'test_imperative_data_loader_fds_clear', + 'test_conv2d_op', 'test_imperative_data_loader_base', + 'test_imperative_resnet_sorted_gradient', + 'test_multiprocess_dataloader_iterable_dataset_dynamic', + 'test_imperative_se_resnext', 'test_norm_nn_grad', 'test_conv2d_api' +] # *=======These unittest doesn't occupy GPU memory, just run as CPU unittest=======* # # It run 16 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, @@ -1031,6 +1685,9 @@ TWO_PARALLEL_JOB = [ def main(): cpu_parallel_job = '^job$' + secondary_cpu_parallel_job = '^job$' + third_cpu_parallel_job = '^job$' + tetrad_parallel_job = '^job$' two_parallel_job = '^job$' non_parallel_job = '^job$' @@ -1038,17 +1695,36 @@ def main(): test_cases = sys.argv[1] test_cases = test_cases.split("\n") - for unittest in CPU_PARALLEL_JOB: + if platform.system() == 'Windows': + cpu_parallel_job_list = CPU_PARALLEL_JOB + tetrad_parallel_job_list = TETRAD_PARALLEL_JOB + two_parallel_job_list = TWO_PARALLEL_JOB + else: + cpu_parallel_job_list = HIGH_PARALLEL_JOB_NEW + tetrad_parallel_job_list = TETRAD_PARALLEL_JOB_NEW + two_parallel_job_list = TWO_PARALLEL_JOB_NEW + + for unittest in cpu_parallel_job_list: if unittest in test_cases: cpu_parallel_job = cpu_parallel_job + '|^' + unittest + '$' test_cases.remove(unittest) - for unittest in TETRAD_PARALLEL_JOB: + if platform.system() != 'Windows': + for unittest in SECONDARY_HIGH_PARALLEL_JOB_NEW: + if unittest in test_cases: + secondary_cpu_parallel_job = secondary_cpu_parallel_job + '|^' + unittest + '$' + test_cases.remove(unittest) + for unittest in THIRD_HIGH_PARALLEL_JOB_NEW: + if unittest in test_cases: + third_cpu_parallel_job = third_cpu_parallel_job + '|^' + unittest + '$' + test_cases.remove(unittest) + + for unittest in tetrad_parallel_job_list: if unittest in test_cases: tetrad_parallel_job = tetrad_parallel_job + '|^' + unittest + '$' test_cases.remove(unittest) - for unittest in TWO_PARALLEL_JOB: + for unittest in two_parallel_job_list: if unittest in test_cases: two_parallel_job = two_parallel_job + '|^' + unittest + '$' test_cases.remove(unittest) @@ -1056,8 +1732,14 @@ def main(): for unittest in test_cases: non_parallel_job = non_parallel_job + '|^' + unittest + '$' - print("{};{};{};{}".format(cpu_parallel_job, tetrad_parallel_job, - two_parallel_job, non_parallel_job)) + if platform.system() == 'Windows': + print("{};{};{};{}".format(cpu_parallel_job, tetrad_parallel_job, + two_parallel_job, non_parallel_job)) + else: + print("{};{};{};{};{};{}".format( + cpu_parallel_job, secondary_cpu_parallel_job, + third_cpu_parallel_job, tetrad_parallel_job, two_parallel_job, + non_parallel_job)) if __name__ == '__main__': -- GitLab