From 1223238f734664bc4d902a70d4d622ebd3591836 Mon Sep 17 00:00:00 2001 From: zhangchunle Date: Wed, 17 Nov 2021 14:18:36 +0800 Subject: [PATCH] add ut parallel (#37211) --- paddle/scripts/paddle_build.sh | 80 +- tools/parallel_UT_rule.py | 1277 +++++++++++++++++++++++--------- 2 files changed, 943 insertions(+), 414 deletions(-) diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 8df831198e4..b7f6d8a5ada 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -1209,15 +1209,17 @@ set +x single_card_tests_high_parallel='^job$' # cases list which would run 24 job each time with single GPU single_card_tests_secondary_high_parallel='^job$' # cases list which would run 15 job each time with single GPU single_card_tests_third_high_parallel='^job$' # cases list which would run 12 job each time with single GPU - single_card_tests_medium_parallel='^job$' # cases list which would run 7 job each time with single GPU - single_card_tests_non_parallel='^job$' # cases list which would run 2 job each time with single GPU + single_card_tests_forth_high_parallel='^job$' # cases list which would run 7 job each time with single GPU + single_card_tests_fifth_high_parallel='^job$' # cases list which would run 4 job each time with single GPU + single_card_tests_lowest_parallel='^job$' # cases list which would run 2 job each time with single GPU + single_card_tests_non_parallel='^job$' # cases list which would run 4 job each time with single GPU single_card_tests='^job$' # all cases list which would take single GPU multiple_card_tests_medium_parallel='^job$' # cases list which would run 4 job each time with multiple GPUs, most cases would be two GPUs - multiple_card_tests_non_parallel='^job$' # cases list which would run 2 job each time with multiple GPUs, most cases would be two GPUs + multiple_card_tests_non_parallel='^job$' # cases list which would run 3 job each time with multiple GPUs, most cases would be two GPUs - exclusive_tests_high_parallel='^job$' # cases list which would run 5 job exclusively(with all GPUs) - exclusive_tests_medium_parallel='^job$' # cases list which would run 3 job exclusively(with all GPUs) + exclusive_tests_high_parallel='^job$' # cases list which would run 7 job exclusively(with all GPUs) + exclusive_tests_medium_parallel='^job$' # cases list which would run 4 job exclusively(with all GPUs) exclusive_tests_non_parallel='^job$' # cases list which would run 2 job exclusively(with all GPUs) is_exclusive='' # indicate whether the case is exclusive type @@ -1227,12 +1229,15 @@ set +x UT_list=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d') output=$(python ${PADDLE_ROOT}/tools/parallel_UT_rule.py "${UT_list}") - cpu_parallel_job=$(echo $output | cut -d ";" -f 1) - secondary_cpu_parallel_job=$(echo $output | cut -d ";" -f 2) - third_cpu_parallel_job=$(echo $output | cut -d ";" -f 3) - tetrad_parallel_job=$(echo $output | cut -d ";" -f 4) - two_parallel_job=$(echo $output | cut -d ";" -f 5) - non_parallel_job=$(echo $output | cut -d ";" -f 6) + high_parallel_job=$(echo $output | cut -d ";" -f 1) + secondary_high_parallel_job=$(echo $output | cut -d ";" -f 2) + third_high_parallel_job=$(echo $output | cut -d ";" -f 3) + fourth_high_parallel_job=$(echo $output | cut -d ";" -f 4) + fifth_high_parallel_job=$(echo $output | cut -d ";" -f 5) + sixth_high_parallel_job=$(echo $output | cut -d ";" -f 6) + lowest_high_parallel_job=$(echo $output | cut -d ";" -f 7) + non_parallel_job=$(echo $output | cut -d ";" -f 8) + while read -r line; do if [[ "$line" == "" ]]; then continue @@ -1274,28 +1279,32 @@ set +x fi if [[ "$is_exclusive" != "" ]]; then - if [[ $(echo $cpu_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then + if [[ $(echo $high_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then exclusive_tests_high_parallel="$exclusive_tests_high_parallel|^$testcase$" - elif [[ $(echo $tetrad_parallel_job$two_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then + elif [[ $(echo $fourth_high_parallel_job$fifth_high_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then exclusive_tests_medium_parallel="$exclusive_tests_medium_parallel|^$testcase$" else exclusive_tests_non_parallel="$exclusive_tests_non_parallel|^$testcase$" fi elif [[ "$is_multicard" != "" ]]; then - if [[ $(echo $cpu_parallel_job$tetrad_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then + if [[ $(echo $high_parallel_job$fourth_high_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then multiple_card_tests_medium_parallel="$multiple_card_tests_medium_parallel|^$testcase$" else multiple_card_tests_non_parallel="$multiple_card_tests_non_parallel|^$testcase$" fi else - if [[ $(echo $cpu_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then + if [[ $(echo $high_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then single_card_tests_high_parallel="$single_card_tests_high_parallel|^$testcase$" - elif [[ $(echo $secondary_cpu_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then + elif [[ $(echo $secondary_high_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then single_card_tests_secondary_high_parallel="$single_card_tests_secondary_high_parallel|^$testcase$" - elif [[ $(echo $third_cpu_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then + elif [[ $(echo $third_high_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then single_card_tests_third_high_parallel="$single_card_tests_third_high_parallel|^$testcase$" - elif [[ $(echo $tetrad_parallel_job$two_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then - single_card_tests_medium_parallel="$single_card_tests_medium_parallel|^$testcase$" + elif [[ $(echo $fourth_high_parallel_job$fifth_high_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then + single_card_tests_forth_high_parallel="$single_card_tests_forth_high_parallel|^$testcase$" + elif [[ $(echo $sixth_high_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then + single_card_tests_fifth_high_parallel="$single_card_tests_fifth_high_parallel|^$testcase$" + elif [[ $(echo $lowest_high_parallel_job| grep -o "\^$testcase\\$") != "" ]]; then + single_card_tests_lowest_parallel="$single_card_tests_lowest_parallel|^$testcase$" else single_card_tests_non_parallel="$single_card_tests_non_parallel|^$testcase$" fi @@ -1312,41 +1321,24 @@ set +x single_ut_startTime_s=`date +%s` card_test "$single_card_tests_high_parallel" 1 24 # run cases 24 job each time with single GPU - echo "single_card_tests_high_parallel finished!!!" card_test "$single_card_tests_secondary_high_parallel" 1 15 # run cases 15 job each time with single GPU - echo "single_card_tests_secondary_high_parallel finished!!!" card_test "$single_card_tests_third_high_parallel" 1 12 # run cases 12 job each time with single GPU - echo "single_card_tests_third_high_parallel finished!!!" - card_test "$single_card_tests_medium_parallel" 1 7 # run cases 7 job each time with single GPU - echo "single_card_tests_medium_parallel finished!!!" - card_test "$single_card_tests_non_parallel" 1 2 # run cases 2 job each time with single GPU - echo "single_card_tests_non_parallel finished!!!" + card_test "$single_card_tests_forth_high_parallel" 1 7 # run cases 7 job each time with single GPU + card_test "$single_card_tests_fifth_high_parallel" 1 4 # run cases 4 job each time with single GPU + card_test "$single_card_tests_lowest_parallel" 1 2 # run cases 2 job each time with single GPU + card_test "$single_card_tests_non_parallel" 1 4 # run cases 4 job each time with single GPU single_ut_endTime_s=`date +%s` - echo "single_card_tests finished!!!" multi_ut_startTime_s=`date +%s` - echo "multiple_card_tests begined!!!!!" card_test "$multiple_card_tests_medium_parallel" 2 4 # run cases 2 job each time with two GPUs - echo "multiple_card_tests_medium_parallel finished!!!" - card_test "$multiple_card_tests_non_parallel" 2 2 # run cases 1 job each time with two GPUs - echo "multiple_card_tests_non_parallel finished!!!" + card_test "$multiple_card_tests_non_parallel" 2 3 # run cases 1 job each time with two GPUs multi_ut_endTime_s=`date +%s` - echo "multiple_card_tests finished!!!" exclu_ut_startTime_s=`date +%s` - echo "exclu_card_tests begined!!!!!" - card_test "$exclusive_tests_high_parallel" -1 5 # run cases exclusively, in this cases would be run with 2/4/8 GPUs - echo "exclusive_tests_high_parallel finished!!!" - card_test "$exclusive_tests_medium_parallel" -1 3 # run cases exclusively, in this cases would be run with 2/4/8 GPUs - echo "exclusive_tests_medium_parallel finished!!!" - card_test "$exclusive_tests_non_parallel" -1 2 # run cases exclusively, in this cases would be run with 2/4/8 GPUs - echo "exclusive_tests_non_parallel finished!!!" + card_test "$exclusive_tests_high_parallel" -1 7 # run cases exclusively, in this cases would be run with 2/4/8 GPUs + card_test "$exclusive_tests_medium_parallel" -1 4 # run cases exclusively, in this cases would be run with 2/4/8 GPUs + card_test "$exclusive_tests_non_parallel" -1 2 # run cases exclusively, in this cases would be run with 2/4/8 GPUs exclu_ut_endTime_s=`date +%s` - echo "exclusive_tests finished!!!" - - echo "ipipe_log_param_1aaa_TestCases_Total_Time: $[ $single_ut_endTime_s - $single_ut_startTime_s ]s" - echo "ipipe_log_param_2aaa_TestCases_Total_Time: $[ $multi_ut_endTime_s - $multi_ut_startTime_s ]s" - echo "ipipe_log_param_Exclusiveaaaa_TestCases_Total_Time: $[ $exclu_ut_endTime_s - $exclu_ut_startTime_s ]s" echo "ipipe_log_param_1_TestCases_Total_Time: $[ $single_ut_endTime_s - $single_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt echo "ipipe_log_param_2_TestCases_Total_Time: $[ $multi_ut_endTime_s - $multi_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index 02d6396f9ce..96ffa5ee860 100644 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -18,294 +18,732 @@ import platform # mem=0 : It run 24 job each time in Single cases; 4 job each time in Multi cases; 5 job each time in exclusive cases HIGH_PARALLEL_JOB_NEW = [ - 'mask_util_test', 'test_communicator_ps_gpu', 'preprocess_local_imagenet', - 'test_nearest_interp_v2_mkldnn_op', 'op_call_stack_test', - 'test_fleet_amp_meta_optimizer', 'test_mkldnn_scale_matmul_fuse_pass', - 'bfloat16_gpu_test', 'test_fc_gru_fuse_pass_cc', 'device_worker_test', - 'test_custom_conj', 'save_load_util_test', 'infer_io_utils_tester', - 'test_transpose_bf16_mkldnn_op', 'test_container', 'cpu_helper_test', - 'test_fake_init_op', 'test_concat_int8_mkldnn_op', - 'test_lookup_table_dequant_op', 'test_broadcast_shape', - 'test_program_to_string', 'test_generate_mask_labels_op', - 'test_eager_deletion_dynamic_rnn_base', 'test_global_var_getter_setter', - 'test_ifelse_basic', 'test_get_set_flags', 'dim_test', - 'test_py_reader_return_list', 'test_fleet_meta_optimizer_base', - 'test_py_reader_error_msg', 'scope_test', 'buffered_allocator_test', - 'test_scaled_dot_product_attention', 'prune_test', 'test_chunk_eval_op', - 'test_static_analysis', 'test_fleet_lars_meta_optimizer', - 'heter_server_test', 'test_while_op', - 'test_runtime_and_compiletime_exception', 'test_precision_recall_op', - 'test_get_inputs_outputs_in_block', 'test_lite_engine_op', + 'mask_util_test', + 'test_communicator_ps_gpu', + 'preprocess_local_imagenet', + 'test_nearest_interp_v2_mkldnn_op', + 'op_call_stack_test', + 'test_fleet_amp_meta_optimizer', + 'test_mkldnn_scale_matmul_fuse_pass', + 'bfloat16_gpu_test', + 'test_fc_gru_fuse_pass_cc', + 'device_worker_test', + 'test_custom_conj', + 'save_load_util_test', + 'infer_io_utils_tester', + 'test_transpose_bf16_mkldnn_op', + 'test_container', + 'cpu_helper_test', + 'test_fake_init_op', + 'test_concat_int8_mkldnn_op', + 'test_lookup_table_dequant_op', + 'test_broadcast_shape', + 'test_program_to_string', + 'test_generate_mask_labels_op', + 'test_eager_deletion_dynamic_rnn_base', + 'test_global_var_getter_setter', + 'test_ifelse_basic', + 'test_get_set_flags', + 'dim_test', + 'test_py_reader_return_list', + 'test_fleet_meta_optimizer_base', + 'test_py_reader_error_msg', + 'scope_test', + 'buffered_allocator_test', + 'test_scaled_dot_product_attention', + 'prune_test', + 'test_chunk_eval_op', + 'test_static_analysis', + 'test_fleet_lars_meta_optimizer', + 'test_while_op', + 'test_runtime_and_compiletime_exception', + 'test_precision_recall_op', + 'test_get_inputs_outputs_in_block', + 'test_lite_engine_op', 'test_repeated_fc_relu_fuse_pass_cc', - 'test_mkldnn_matmul_op_output_fuse_pass', 'cudnn_helper_test', - 'test_check_abi', 'data_type_test', 'test_recurrent_op', 'test_asp_utils', - 'test_paddle_inference_api', 'test_reference_count_pass_last_lived_ops', - 'test_op_support_gpu', 'test_conditional_block', - 'test_fleet_rolemaker_init', 'test_pybind_interface', 'test_io_save_load', - 'test_split_and_merge_lod_tensor_op', 'test_fusion_lstm_int8_mkldnn_op', - 'test_benchmark', 'test_protobuf', 'test_tdm_sampler_op', - 'test_teacher_student_sigmoid_loss_op', 'test_transpose_int8_mkldnn_op', - 'test_transpose_mkldnn_op', 'test_fleet_rolemaker_4', 'to_string_test', - 'test_c_comm_init_all_op', 'test_bilinear_interp_mkldnn_op', - 'test_split_bf16_mkldnn_op', 'test_cpu_quantize_squash_pass', - 'test_batch_norm_act_fuse_pass', 'test_mkldnn_op_inplace', - 'test_seqpool_concat_fuse_pass', 'test_analyzer_save_model', - 'test_exception', 'test_fc_lstm_fuse_pass', 'test_similarity_focus_op', - 'test_conv_batch_norm_mkldnn_fuse_pass', 'test_sequence_last_step', - 'test_mkldnn_cpu_bfloat16_pass', 'op_debug_string_test', - 'test_quant2_int8_mkldnn_pass', 'test_layer', 'test_sampling_id_op', - 'test_nce', 'graph_helper_test', + 'test_mkldnn_matmul_op_output_fuse_pass', + 'cudnn_helper_test', + 'test_check_abi', + 'data_type_test', + 'test_recurrent_op', + 'test_asp_utils', + 'test_paddle_inference_api', + 'test_reference_count_pass_last_lived_ops', + 'test_op_support_gpu', + 'test_conditional_block', + 'test_fleet_rolemaker_init', + 'test_pybind_interface', + 'test_io_save_load', + 'test_split_and_merge_lod_tensor_op', + 'test_fusion_lstm_int8_mkldnn_op', + 'test_benchmark', + 'test_protobuf', + 'test_tdm_sampler_op', + 'test_teacher_student_sigmoid_loss_op', + 'test_transpose_int8_mkldnn_op', + 'test_transpose_mkldnn_op', + 'test_fleet_rolemaker_4', + 'to_string_test', + 'test_c_comm_init_all_op', + 'test_bilinear_interp_mkldnn_op', + 'test_split_bf16_mkldnn_op', + 'test_cpu_quantize_squash_pass', + 'test_batch_norm_act_fuse_pass', + 'test_mkldnn_op_inplace', + 'test_seqpool_concat_fuse_pass', + 'test_analyzer_save_model', + 'test_exception', + 'test_fc_lstm_fuse_pass', + 'test_similarity_focus_op', + 'test_conv_batch_norm_mkldnn_fuse_pass', + 'test_sequence_last_step', + 'test_mkldnn_cpu_bfloat16_pass', + 'op_debug_string_test', + 'test_quant2_int8_mkldnn_pass', + 'test_layer', + 'test_sampling_id_op', + 'test_nce', + 'graph_helper_test', 'test_static_shape_inferrence_for_shape_tensor', - 'test_layer_norm_mkldnn_op', 'test_fleet_launch_async', - 'test_multi_gru_fuse_pass', 'test_hash_op', 'test_rpn_target_assign_op', - 'test_concat_bf16_mkldnn_op', 'test_fc_lstm_fuse_pass_cc', 'test_version', - 'gather_test', 'test_mkldnn_inplace_fuse_pass', 'test_reshape_bf16_op', - 'test_compat', 'test_data_feeder', 'cpu_vec_test', - 'test_distributed_strategy', 'test_hsigmoid_op', 'test_hooks', - 'test_fleet_base_2', 'op_kernel_type_test', - 'test_layer_norm_bf16_mkldnn_op', 'test_fleetrun', 'cpu_info_test', - 'brpc_utils_test', 'test_fusion_seqexpand_concat_fc_op', 'test_dataset_voc', - 'test_analyzer_capi_exp_int', 'test_post_training_quantization_resnet50', - 'cuda_helper_test', 'test_conv_concat_relu_mkldnn_fuse_pass', - 'test_bf16_utils', 'test_sum_bf16_mkldnn_op', - 'test_unsqueeze2_eltwise_fuse_pass', 'dense_table_test', - 'test_collective_optimizer', 'test_origin_info', 'test_dgc_optimizer', - 'test_avoid_twice_initialization', 'test_reduce_bf16_mkldnn_op', - 'test_mkldnn_conv_bias_fuse_pass', 'cow_ptr_tests', 'eigen_test', - 'reader_blocking_queue_test', 'test_fusion_gru_op', 'operator_test', - 'test_fusion_gru_int8_mkldnn_op', 'test_cpu_bfloat16_pass', - 'test_multiprocess_dataloader_iterable_dataset_split', 'test_scope', - 'test_analyzer_bfloat16_mobilenetv2', 'test_fleet_rolemaker_2', - 'float16_test', 'test_dpsgd_op', - 'test_conv_elementwise_add_mkldnn_fuse_pass', 'test_crypto', - 'test_sgd_op_bf16', 'test_analyzer_capi_exp_ner', - 'lite_subgraph_pass_tester', 'test_tf32_cudnn', 'threadpool_test', - 'test_cpu_quantize_pass', 'test_analyzer_capi_exp_pd_tensor', 'tuple_test', - 'test_analyzer_lac', 'test_prune', 'test_bilinear_interp_v2_mkldnn_op', - 'test_lod_tensor_array', 'test_logging_utils', 'test_fleet_nocvm_1', - 'stringprintf_test', 'test_nearest_interp_mkldnn_op', - 'test_matmul_mkldnn_op', 'test_debugger', 'test_custom_attrs_jit', - 'test_lrn_mkldnn_op', 'test_set_bool_attr', 'version_test', - 'test_broadcast_to_op', 'test_squared_mat_sub_fuse_pass', - 'test_fleet_ascend_utils', 'test_layer_norm_fuse_pass', - 'test_fused_emb_seq_pool_op', 'test_imperative_data_loader_exit_func', - 'test_feed_fetch_method', 'test_protobuf_descs', 'test_fleet_unitaccessor', - 'test_sequence_scatter_op', 'test_skip_layernorm_fuse_pass', - 'test_fs_interface', 'test_gast_with_compatibility', - 'test_repeated_fc_relu_fuse_pass', 'timer_test', 'var_type_traits_test', - 'test_py_reader_sample_generator', 'test_conv2d_transpose_mkldnn_op', - 'test_fleet_runtime', 'test_rnn_cudnn_params_packing', - 'test_mkldnn_placement_pass', 'test_fc_elementwise_layernorm_fuse_pass', - 'program_desc_test', 'test_simplify_with_basic_ops_pass', - 'test_dygraph_mode_of_unittest', 'gather_op_test', 'test_trainer_desc', - 'test_matmul_bf16_mkldnn_op', 'test_analyzer_seq_conv1', - 'test_fused_embedding_fc_lstm_op', 'test_conv2d_transpose_bf16_mkldnn_op', - 'check_reduce_rank_test', 'test_progressbar', 'test_seed_op', - 'test_shrink_rnn_memory', 'test_fc_bf16_mkldnn_op', - 'test_sequence_first_step', 'test_layer_norm_fuse_pass_cc', - 'test_fusion_lstm_mkldnn_op', 'test_elementwise_add_bf16_mkldnn_op', - 'test_static_save_load_bf16', 'test_elementwise_mul_bf16_mkldnn_op', - 'test_distributions', 'operator_exception_test', 'dropout_op_test', - 'test_gpu_package_without_gpu_device', 'test_detection_map_op', - 'test_zeros_op', 'test_launch_coverage', - 'test_mkldnn_conv_activation_fuse_pass', 'test_inference_model_io', - 'heter_listen_and_server_test', 'test_fusion_repeated_fc_relu_op', - 'cudnn_desc_test', 'test_beam_search_op', 'test_var_conv_2d', - 'test_listen_and_serv_op', 'test_dequantize_mkldnn_op', - 'test_analyzer_capi_exp_pd_threads', 'test_selected_rows', - 'test_fleet_sharding_meta_optimizer', 'test_inference_api', - 'test_mkldnn_inplace_pass', 'test_data_generator', - 'test_deprecated_memory_optimize_interfaces', 'test_ir_skip_layernorm_pass', - 'broadcast_op_test', 'test_multihead_matmul_fuse_pass', - 'test_lookup_table_bf16_op', 'test_positive_negative_pair_op', 'init_test', - 'test_tensorrt', 'test_check_error', 'test_program', 'mmap_allocator_test', - 'test_reshape_transpose_matmul_mkldnn_fuse_pass', 'test_communicator_async', - 'test_downpoursgd', 'variable_test', 'test_quantization_mkldnn_pass', - 'test_quantize_mkldnn_op', 'test_create_op_doc_string', - 'test_analyzer_lexical_gru_bfloat16', 'test_imperative_data_loader_process', - 'assign_op_test', 'test_analyzer_capi_exp_xpu', 'test_conv_bn_fuse_pass_cc', - 'test_recommender_system', 'test_ones_op', 'test_fc_mkldnn_op', - 'test_load_op_xpu', 'test_pool2d_int8_mkldnn_op', 'test_mul_int8_mkldnn_op', - 'test_scale_matmul_fuse_pass', 'test_fleet_graph_executor', 'decorator_test', - 'test_collective_base', 'test_lod_rank_table', 'test_multi_gru_mkldnn_op', - 'test_eager_deletion_conditional_block', 'op_proto_maker_test', - 'test_mkldnn_op_nhwc', 'test_fc_act_mkldnn_fuse_pass', 'test_fleet_base_3', - 'test_basic_rnn_name', 'test_query_op', 'test_fleet_base_4', - 'save_load_op_test', 'test_batch_sampler', - 'test_image_classification_layer', 'test_fusion_gru_mkldnn_op', - 'graph_test', 'test_ir_graph', 'test_hapi_hub_model', - 'test_requantize_mkldnn_op', 'test_depthwise_conv_mkldnn_pass', - 'test_fleet_metric', 'test_fc_fuse_pass_cc', 'test_fleet_private_function', - 'test_fleet', 'test_executor_check_feed', 'test_py_reader_lod_level_share', - 'nccl_context_test', 'inlined_vector_test', - 'test_generate_proposal_labels_op', 'test_analyzer_capi_exp_pd_config', - 'test_locality_aware_nms_op', 'test_imperative_decorator', - 'test_npair_loss_op', 'test_ps_dispatcher', 'test_analyzer_rnn2', - 'test_multi_gru_seq_fuse_pass', 'test_filter_by_instag_op', 'test_switch', - 'test_matmul_transpose_reshape_fuse_pass', 'test_mkldnn_caching', - 'test_fetch_var', 'op_compatible_info_test', 'complex_test', - 'test_fleet_static_mp_layers', 'test_aligned_allocator', - 'test_analyzer_transformer_fuse', 'test_sequence_topk_avg_pooling', - 'test_analyzer_lexical_gru', 'test_broadcast_error', 'test_context_manager', - 'test_registry', 'brpc_service_sparse_sgd_test', 'test_operator', - 'test_mkldnn_conv_concat_relu_mkldnn_fuse_pass', 'test_collective_api_base', - 'test_entry_attr', 'test_get_places_op', 'test_softmax_mkldnn_op', - 'test_dynrnn_static_input', 'auto_growth_best_fit_allocator_test', - 'test_batch_norm_mkldnn_op', 'test_bpr_loss_op', - 'no_need_buffer_vars_inference_test', 'test_fleet_cc', 'test_download', - 'test_fleet_recompute_meta_optimizer', 'test_seqpool_cvm_concat_fuse_pass', - 'test_common_infer_shape_functions', 'test_fusion_seqpool_concat_op', - 'test_op_compat_sensible_pass', 'test_fs', 'test_fc_rnn_mkldnn_fuse_pass', - 'split_test', 'test_fusion_group_pass', 'test_fusion_lstm_bf16_mkldnn_op', - 'test_executor_feed_non_tensor', 'test_var_info', 'test_reducescatter', - 'test_fleet_ps', 'test_check_import_scipy', 'test_load_vars_shape_check', - 'test_nn_functional_embedding_static', 'test_fleet_rolemaker_new', - 'test_imperative_base', 'dist_multi_trainer_test', - 'test_mine_hard_examples_op', 'test_post_training_quantization_lstm_model', - 'aes_cipher_test', 'test_analyzer_zerocopytensor_tensor', 'rw_lock_test', - 'exception_holder_test', 'enforce_test', 'test_rnn_memory_helper_op', - 'ddim_test', 'test_eager_deletion_padding_rnn', 'test_is_test_pass', - 'test_fusion_seqconv_eltadd_relu_op', 'test_fleet_localsgd_meta_optimizer', - 'node_test', 'test_analyzer_text_classification', - 'test_seq_concat_fc_fuse_pass', 'test_imperative_numpy_bridge', - 'test_adaptive_pool2d_convert_global_pass', 'test_lookup_table_v2_bf16_op', - 'test_operator_desc', 'test_elementwise_mul_mkldnn_op', - 'test_fetch_handler', 'test_cpu_bfloat16_placement_pass', - 'test_match_matrix_tensor_op', 'test_fleet_run_random_port', + 'test_layer_norm_mkldnn_op', + 'test_fleet_launch_async', + 'test_multi_gru_fuse_pass', + 'test_hash_op', + 'test_rpn_target_assign_op', + 'test_concat_bf16_mkldnn_op', + 'test_fc_lstm_fuse_pass_cc', + 'test_version', + 'gather_test', + 'test_mkldnn_inplace_fuse_pass', + 'test_reshape_bf16_op', + 'test_compat', + 'test_data_feeder', + 'cpu_vec_test', + 'test_distributed_strategy', + 'test_hsigmoid_op', + 'test_hooks', + 'test_fleet_base_2', + 'op_kernel_type_test', + 'test_layer_norm_bf16_mkldnn_op', + 'test_fleetrun', + 'cpu_info_test', + 'brpc_utils_test', + 'test_fusion_seqexpand_concat_fc_op', + 'test_dataset_voc', + 'test_analyzer_capi_exp_int', + 'test_post_training_quantization_resnet50', + 'cuda_helper_test', + 'test_conv_concat_relu_mkldnn_fuse_pass', + 'test_bf16_utils', + 'test_sum_bf16_mkldnn_op', + 'test_unsqueeze2_eltwise_fuse_pass', + 'dense_table_test', + 'test_collective_optimizer', + 'test_origin_info', + 'test_dgc_optimizer', + 'test_avoid_twice_initialization', + 'test_reduce_bf16_mkldnn_op', + 'test_mkldnn_conv_bias_fuse_pass', + 'cow_ptr_tests', + 'eigen_test', + 'reader_blocking_queue_test', + 'test_fusion_gru_op', + 'operator_test', + 'test_fusion_gru_int8_mkldnn_op', + 'test_cpu_bfloat16_pass', + 'test_multiprocess_dataloader_iterable_dataset_split', + 'test_scope', + 'test_analyzer_bfloat16_mobilenetv2', + 'test_fleet_rolemaker_2', + 'float16_test', + 'test_dpsgd_op', + 'test_conv_elementwise_add_mkldnn_fuse_pass', + 'test_crypto', + 'test_sgd_op_bf16', + 'test_analyzer_capi_exp_ner', + 'lite_subgraph_pass_tester', + 'test_tf32_cudnn', + 'threadpool_test', + 'test_cpu_quantize_pass', + 'test_analyzer_capi_exp_pd_tensor', + 'tuple_test', + 'test_analyzer_lac', + 'test_prune', + 'test_bilinear_interp_v2_mkldnn_op', + 'test_lod_tensor_array', + 'test_logging_utils', + 'test_fleet_nocvm_1', + 'stringprintf_test', + 'test_nearest_interp_mkldnn_op', + 'test_matmul_mkldnn_op', + 'test_debugger', + 'test_custom_attrs_jit', + 'test_lrn_mkldnn_op', + 'test_set_bool_attr', + 'version_test', + 'test_broadcast_to_op', + 'test_squared_mat_sub_fuse_pass', + 'test_fleet_ascend_utils', + 'test_layer_norm_fuse_pass', + 'test_fused_emb_seq_pool_op', + 'test_imperative_data_loader_exit_func', + 'test_feed_fetch_method', + 'test_protobuf_descs', + 'test_fleet_unitaccessor', + 'test_sequence_scatter_op', + 'test_skip_layernorm_fuse_pass', + 'test_fs_interface', + 'test_gast_with_compatibility', + 'test_repeated_fc_relu_fuse_pass', + 'timer_test', + 'var_type_traits_test', + 'test_py_reader_sample_generator', + 'test_conv2d_transpose_mkldnn_op', + 'test_fleet_runtime', + 'test_rnn_cudnn_params_packing', + 'test_mkldnn_placement_pass', + 'test_fc_elementwise_layernorm_fuse_pass', + 'program_desc_test', + 'test_simplify_with_basic_ops_pass', + 'test_dygraph_mode_of_unittest', + 'gather_op_test', + 'test_trainer_desc', + 'test_matmul_bf16_mkldnn_op', + 'test_analyzer_seq_conv1', + 'test_fused_embedding_fc_lstm_op', + 'test_conv2d_transpose_bf16_mkldnn_op', + 'check_reduce_rank_test', + 'test_progressbar', + 'test_seed_op', + 'test_shrink_rnn_memory', + 'test_fc_bf16_mkldnn_op', + 'test_sequence_first_step', + 'test_layer_norm_fuse_pass_cc', + 'test_fusion_lstm_mkldnn_op', + 'test_elementwise_add_bf16_mkldnn_op', + 'test_static_save_load_bf16', + 'test_elementwise_mul_bf16_mkldnn_op', + 'test_distributions', + 'operator_exception_test', + 'dropout_op_test', + 'test_gpu_package_without_gpu_device', + 'test_detection_map_op', + 'test_zeros_op', + 'test_launch_coverage', + 'test_mkldnn_conv_activation_fuse_pass', + 'test_inference_model_io', + 'test_fusion_repeated_fc_relu_op', #'heter_listen_and_server_test', + 'cudnn_desc_test', + 'test_beam_search_op', + 'test_var_conv_2d', + 'test_listen_and_serv_op', + 'test_dequantize_mkldnn_op', + 'test_analyzer_capi_exp_pd_threads', + 'test_selected_rows', + 'test_fleet_sharding_meta_optimizer', + 'test_inference_api', + 'test_mkldnn_inplace_pass', + 'test_data_generator', + 'test_deprecated_memory_optimize_interfaces', + 'test_ir_skip_layernorm_pass', + 'broadcast_op_test', + 'test_multihead_matmul_fuse_pass', + 'test_lookup_table_bf16_op', + 'test_positive_negative_pair_op', + 'init_test', + 'test_tensorrt', + 'test_check_error', + 'test_program', + 'mmap_allocator_test', + 'test_reshape_transpose_matmul_mkldnn_fuse_pass', + 'test_communicator_async', + 'test_downpoursgd', + 'variable_test', + 'test_quantization_mkldnn_pass', + 'test_quantize_mkldnn_op', + 'test_create_op_doc_string', + 'test_analyzer_lexical_gru_bfloat16', + 'test_imperative_data_loader_process', + 'assign_op_test', + 'test_analyzer_capi_exp_xpu', + 'test_conv_bn_fuse_pass_cc', + 'test_recommender_system', + 'test_ones_op', + 'test_fc_mkldnn_op', + 'test_load_op_xpu', + 'test_pool2d_int8_mkldnn_op', + 'test_mul_int8_mkldnn_op', + 'test_scale_matmul_fuse_pass', + 'test_fleet_graph_executor', + 'decorator_test', + 'test_collective_base', + 'test_lod_rank_table', + 'test_multi_gru_mkldnn_op', + 'test_eager_deletion_conditional_block', + 'op_proto_maker_test', + 'test_mkldnn_op_nhwc', + 'test_fc_act_mkldnn_fuse_pass', + 'test_fleet_base_3', + 'test_basic_rnn_name', + 'test_query_op', + 'test_fleet_base_4', + 'save_load_op_test', + 'test_batch_sampler', + 'test_image_classification_layer', + 'test_fusion_gru_mkldnn_op', + 'graph_test', + 'test_ir_graph', + 'test_hapi_hub_model', + 'test_requantize_mkldnn_op', + 'test_depthwise_conv_mkldnn_pass', + 'test_fleet_metric', + 'test_fc_fuse_pass_cc', + 'test_fleet_private_function', + 'test_fleet', + 'test_executor_check_feed', + 'test_py_reader_lod_level_share', + 'nccl_context_test', + 'inlined_vector_test', + 'test_generate_proposal_labels_op', + 'test_analyzer_capi_exp_pd_config', + 'test_locality_aware_nms_op', + 'test_imperative_decorator', + 'test_npair_loss_op', + 'test_ps_dispatcher', + 'test_analyzer_rnn2', + 'test_multi_gru_seq_fuse_pass', + 'test_filter_by_instag_op', + 'test_switch', + 'test_matmul_transpose_reshape_fuse_pass', + 'test_mkldnn_caching', + 'test_fetch_var', + 'op_compatible_info_test', + 'complex_test', + 'test_fleet_static_mp_layers', + 'test_aligned_allocator', + 'test_analyzer_transformer_fuse', + 'test_sequence_topk_avg_pooling', + 'test_analyzer_lexical_gru', + 'test_broadcast_error', + 'test_context_manager', + 'test_registry', + 'brpc_service_sparse_sgd_test', + 'test_operator', + 'test_mkldnn_conv_concat_relu_mkldnn_fuse_pass', + 'test_collective_api_base', + 'test_entry_attr', + 'test_get_places_op', + 'test_softmax_mkldnn_op', + 'test_dynrnn_static_input', + 'auto_growth_best_fit_allocator_test', + 'test_batch_norm_mkldnn_op', + 'test_bpr_loss_op', + 'no_need_buffer_vars_inference_test', + 'test_fleet_cc', + 'test_download', + 'test_fleet_recompute_meta_optimizer', + 'test_seqpool_cvm_concat_fuse_pass', + 'test_common_infer_shape_functions', + 'test_fusion_seqpool_concat_op', + 'test_op_compat_sensible_pass', + 'test_fs', + 'test_fc_rnn_mkldnn_fuse_pass', + 'split_test', + 'test_fusion_group_pass', + 'test_fusion_lstm_bf16_mkldnn_op', + 'test_executor_feed_non_tensor', + 'test_var_info', + 'test_reducescatter', + 'test_fleet_ps', + 'test_check_import_scipy', + 'test_load_vars_shape_check', + 'test_nn_functional_embedding_static', + 'test_fleet_rolemaker_new', + 'test_imperative_base', + 'dist_multi_trainer_test', + 'test_mine_hard_examples_op', + 'test_post_training_quantization_lstm_model', + 'aes_cipher_test', + 'test_analyzer_zerocopytensor_tensor', + 'rw_lock_test', + 'exception_holder_test', + 'enforce_test', + 'test_rnn_memory_helper_op', + 'ddim_test', + 'test_eager_deletion_padding_rnn', + 'test_is_test_pass', + 'test_fusion_seqconv_eltadd_relu_op', + 'test_fleet_localsgd_meta_optimizer', + 'node_test', + 'test_analyzer_text_classification', + 'test_seq_concat_fc_fuse_pass', + 'test_imperative_numpy_bridge', + 'test_adaptive_pool2d_convert_global_pass', + 'test_lookup_table_v2_bf16_op', + 'test_operator_desc', + 'test_elementwise_mul_mkldnn_op', + 'test_fetch_handler', + 'test_cpu_bfloat16_placement_pass', + 'test_match_matrix_tensor_op', + 'test_fleet_run_random_port', 'test_mkldnn_matmul_transpose_reshape_fuse_pass', - 'test_fleet_lamb_meta_optimizer', 'test_op_version', - 'fused_broadcast_op_test', 'stringpiece_test', 'test_tdm_child_op', - 'test_imperative_group', 'test_analyzer_capi_exp', - 'test_post_training_quantization_mobilenetv1', 'test_load_op', - 'test_executor_and_use_program_cache', 'op_registry_test', - 'test_create_global_var', 'test_dispatch_jit', 'table_test', 'test_full_op', - 'test_recv_save_op', 'test_fusion_lstm_op', - 'test_eager_deletion_recurrent_op', 'brpc_service_dense_sgd_test', - 'op_tester', 'test_eager_deletion_mnist', 'test_infer_shape', - 'test_fleet_rolemaker', 'test_entry_attr2', 'test_monitor', - 'test_require_version', 'test_function_spec', 'test_image', - 'lod_tensor_test', 'place_test', 'test_fleet_launch_cloud', + 'test_fleet_lamb_meta_optimizer', + 'test_op_version', + 'fused_broadcast_op_test', + 'stringpiece_test', + 'test_tdm_child_op', + 'test_imperative_group', + 'test_analyzer_capi_exp', + 'test_post_training_quantization_mobilenetv1', + 'test_load_op', + 'test_executor_and_use_program_cache', + 'op_registry_test', + 'test_create_global_var', + 'test_dispatch_jit', + 'table_test', + 'test_full_op', + 'test_recv_save_op', + 'test_fusion_lstm_op', + 'test_eager_deletion_recurrent_op', + 'brpc_service_dense_sgd_test', + 'op_tester', + 'test_eager_deletion_mnist', + 'test_infer_shape', + 'test_fleet_rolemaker', + 'test_entry_attr2', + 'test_monitor', + 'test_require_version', + 'test_function_spec', + 'test_image', + 'lod_tensor_test', + 'place_test', + 'test_fleet_launch_cloud', 'test_conv2d_bf16_mkldnn_op', - 'test_parallel_executor_run_load_infer_program', 'scatter_test', - 'graph_to_program_pass_test', 'test_lod_tensor_array_ops', - 'test_embedding_eltwise_layernorm_fuse_pass', 'complex_gpu_test', - 'save_load_combine_op_test', 'test_logger', 'test_analyzer', 'test_utils', - 'barrier_table_test', 'test_memory_usage', 'test_sysconfig', 'reader_test', - 'test_conv_bias_mkldnn_fuse_pass', 'math_function_test', - 'beam_search_decode_op_test', 'save_quant2_model_resnet50', 'bfloat16_test', - 'test_scale_bf16_mkldnn_op', 'test_fp16_utils', - 'test_cpu_quantize_placement_pass', 'test_slice_var', 'test_analyzer_ocr', - 'test_flags_use_mkldnn', 'pass_test', 'test_trainable', - 'test_sync_batch_norm_pass', 'lodtensor_printer_test', 'test_calc_gradient', - 'test_create_parameter', 'test_infer_no_need_buffer_slots', - 'test_run_fluid_by_module_or_command_line', 'test_boxps', - 'test_initializer', 'test_fusion_squared_mat_sub_op', 'test_desc_clone', - 'test_analyzer_mobilenet_depthwise_conv', 'test_analyzer_pyramid_dnn', - 'test_analyzer_detect_functional_mkldnn', 'errors_test', 'test_name_scope', - 'var_type_inference_test', 'test_const_value', - 'test_spawn_and_init_parallel_env', 'test_fleet_gradient_scale', - 'unroll_array_ops_test', 'test_fc_gru_fuse_pass', 'op_version_registry_test', - 'test_cudnn_placement_pass', 'cipher_utils_test', 'test_program_code', - 'test_save_model_without_var', 'program_processing_test', - 'test_fleet_distributed_strategy', 'test_hybrid_parallel_topology', - 'test_ascend_trigger', 'test_fleet_rolemaker_3', - 'test_conv_activation_mkldnn_fuse_pass', 'test_fusion_gru_bf16_mkldnn_op', - 'test_model_cast_to_bf16', 'test_quantize_transpiler', - 'conditional_block_op_test', 'test_fleet_gradient_merge_meta_optimizer', - 'test_graph_pattern_detector', 'test_fleet_fp16_allreduce_meta_optimizer', - 'test_unique_name', 'test_multi_out_jit', 'test_attention_lstm_op', - 'test_mkldnn_quantizer_config', 'data_layout_transform_test', - 'test_conv2d_int8_mkldnn_op', 'test_fusion_seqpool_cvm_concat_op', - 'save_quant2_model_gru', 'test_generator', 'test_sum_mkldnn_op', - 'test_fleet_util', 'test_fleet_dgc_meta_optimizer', - 'selected_rows_functor_test', 'test_default_scope_funcs', - 'test_communicator_sync', 'test_communicator_half_async', - 'test_dynrnn_gradient_check', 'test_pool2d_bf16_mkldnn_op', - 'test_table_printer', 'test_framework_debug_str', 'test_dist_fleet_ps2', - 'test_collective_scatter_api', 'test_dist_sparse_tensor_load_ftrl', - 'test_dist_mnist_dgc_nccl', 'test_dist_oneps', 'test_dist_tree_index', - 'test_dist_fleet_ps', 'test_dist_fleet_a_sync_optimizer_sync', - 'test_dist_fleet_decay', 'test_auto_checkpoint2', - 'test_dist_fleet_heter_ctr', 'test_dist_fleet_simnet', - 'test_dist_sparse_load_ps1', 'test_dist_mnist_fleet_save', - 'test_dist_fleet_ps7', 'test_dist_mnist_fleetapi', - 'test_dist_sparse_tensor_load_adam', 'test_dist_fleet_ps_gpu_ctr', - 'test_dist_mnist_ring_allreduce', 'test_dist_op', 'test_new_group_api', - 'test_dist_fleet_heter_base', 'test_collective_split_col_linear', - 'test_parallel_executor_mnist', 'test_dist_fleet_ctr2', - 'test_dist_fleet_heter_program', 'test_dist_fleet_ctr', - 'test_collective_allreduce_api', 'test_dataloader_unkeep_order', - 'test_dataloader_keep_order', 'test_dist_se_resnext_sync', 'test_hdfs2', - 'test_dist_fleet_ps6', 'test_dist_fleet_a_sync_optimizer_auto_async', - 'test_dist_fleet_a_sync_optimizer_auto', 'test_dist_fleet_ps9', + 'test_parallel_executor_run_load_infer_program', + 'scatter_test', + 'graph_to_program_pass_test', + 'test_lod_tensor_array_ops', + 'test_embedding_eltwise_layernorm_fuse_pass', + 'complex_gpu_test', + 'save_load_combine_op_test', + 'test_logger', + 'test_analyzer', + 'test_utils', + 'barrier_table_test', + 'test_memory_usage', + 'test_sysconfig', + 'reader_test', + 'test_conv_bias_mkldnn_fuse_pass', + 'math_function_test', + 'beam_search_decode_op_test', + 'save_quant2_model_resnet50', + 'bfloat16_test', + 'test_scale_bf16_mkldnn_op', + 'test_fp16_utils', + 'test_cpu_quantize_placement_pass', + 'test_slice_var', + 'test_analyzer_ocr', + 'test_flags_use_mkldnn', + 'pass_test', + 'test_trainable', + 'test_sync_batch_norm_pass', + 'lodtensor_printer_test', + 'test_calc_gradient', + 'test_create_parameter', + 'test_infer_no_need_buffer_slots', + 'test_run_fluid_by_module_or_command_line', + 'test_boxps', + 'test_initializer', + 'test_fusion_squared_mat_sub_op', + 'test_desc_clone', + 'test_analyzer_mobilenet_depthwise_conv', + 'test_analyzer_pyramid_dnn', + 'test_analyzer_detect_functional_mkldnn', + 'errors_test', + 'test_name_scope', + 'var_type_inference_test', + 'test_const_value', + 'test_spawn_and_init_parallel_env', + 'test_fleet_gradient_scale', + 'unroll_array_ops_test', + 'test_fc_gru_fuse_pass', + 'op_version_registry_test', + 'test_cudnn_placement_pass', + 'cipher_utils_test', + 'test_program_code', + 'test_save_model_without_var', + 'program_processing_test', + 'test_fleet_distributed_strategy', + 'test_hybrid_parallel_topology', + 'test_ascend_trigger', + 'test_fleet_rolemaker_3', + 'test_conv_activation_mkldnn_fuse_pass', + 'test_fusion_gru_bf16_mkldnn_op', + 'test_model_cast_to_bf16', + 'test_quantize_transpiler', + 'conditional_block_op_test', + 'test_fleet_gradient_merge_meta_optimizer', + 'test_graph_pattern_detector', + 'test_fleet_fp16_allreduce_meta_optimizer', + 'test_unique_name', + 'test_multi_out_jit', + 'test_attention_lstm_op', + 'test_mkldnn_quantizer_config', + 'data_layout_transform_test', + 'test_conv2d_int8_mkldnn_op', + 'test_fusion_seqpool_cvm_concat_op', + 'save_quant2_model_gru', + 'test_generator', + 'test_sum_mkldnn_op', + 'test_fleet_util', + 'test_fleet_dgc_meta_optimizer', + 'selected_rows_functor_test', + 'test_default_scope_funcs', + 'test_communicator_sync', + 'test_communicator_half_async', + 'test_dynrnn_gradient_check', + 'test_pool2d_bf16_mkldnn_op', + 'test_table_printer', + 'test_framework_debug_str', + 'test_dist_fleet_ps2', + 'test_collective_scatter_api', + 'test_dist_sparse_tensor_load_ftrl', + 'test_dist_mnist_dgc_nccl', + 'test_dist_oneps', + 'test_dist_tree_index', + 'test_dist_fleet_ps', + 'test_dist_fleet_a_sync_optimizer_sync', + 'test_dist_fleet_decay', + 'test_auto_checkpoint2', + 'test_dist_fleet_heter_ctr', + 'test_dist_fleet_simnet', + 'test_dist_sparse_load_ps1', + 'test_dist_mnist_fleet_save', + 'test_dist_fleet_ps7', + 'test_dist_mnist_fleetapi', + 'test_dist_sparse_tensor_load_adam', + 'test_dist_fleet_ps_gpu_ctr', + 'test_dist_mnist_ring_allreduce', + 'test_dist_op', + 'test_new_group_api', + 'test_dist_fleet_heter_base', + 'test_collective_split_col_linear', + 'test_parallel_executor_mnist', + 'test_dist_fleet_ctr2', + 'test_dist_fleet_heter_program', + 'test_dist_fleet_ctr', + 'test_collective_allreduce_api', + 'test_dataloader_unkeep_order', + 'test_dataloader_keep_order', + 'test_dist_se_resnext_sync', + 'test_hdfs2', + 'test_dist_fleet_ps6', + 'test_dist_fleet_a_sync_optimizer_auto_async', + 'test_dist_fleet_a_sync_optimizer_auto', + 'test_dist_fleet_ps9', 'test_dist_fleet_raw_program_optimizer_fuse_allreduce', - 'test_dist_fleet_ps11', 'test_dist_fleet_ps8', - 'test_dist_mnist_fp16_allreduce', 'test_dist_fleet_ps12', - 'test_collective_split_row_linear', 'test_collective_reduce_api', - 'test_multiprocess_dataloader_exception', 'test_collective_allgather_api', - 'test_dist_fleet_ps10', 'test_dist_sparse_tensor_load_rmsprop', + 'test_dist_fleet_ps11', + 'test_dist_fleet_ps8', + 'test_dist_mnist_fp16_allreduce', + 'test_dist_fleet_ps12', + 'test_collective_split_row_linear', + 'test_collective_reduce_api', + 'test_multiprocess_dataloader_exception', + 'test_collective_allgather_api', + 'test_dist_fleet_ps10', + 'test_dist_sparse_tensor_load_rmsprop', 'test_collective_split_embedding_none_divisible', - 'test_parallel_dygraph_dataparallel', 'test_auto_checkpoint3', - 'test_fleet_graph_execution_meta_optimizer', 'test_auto_checkpoint1', - 'test_dist_fleet_ps3', 'test_dist_mnist_pg', 'test_pipeline_parallel', - 'test_dist_fleet_ps5', 'test_dist_fleet_sparse_embedding_ctr', - 'test_collective_broadcast_api', 'test_fleet_checkpoint', - 'retry_allocator_test', 'test_auto_checkpoint_multiple', - 'test_dist_mnist_backward_deps', 'test_dist_mnist_multi_comm', 'test_hdfs3', - 'test_hdfs1', 'test_dist_allreduce_op', - 'test_parallel_dygraph_sparse_embedding', 'test_dist_se_resnext_dgc', - 'test_dist_sharding_save', 'test_dist_fleet_a_sync_optimizer_async', - 'test_gen_nccl_id_op', 'test_auto_checkpoint', + 'test_parallel_dygraph_dataparallel', + 'test_auto_checkpoint3', + 'test_fleet_graph_execution_meta_optimizer', + 'test_auto_checkpoint1', + 'test_dist_fleet_ps3', + 'test_dist_mnist_pg', + 'test_pipeline_parallel', + 'test_dist_fleet_ps5', + 'test_dist_fleet_sparse_embedding_ctr', + 'test_collective_broadcast_api', + 'test_fleet_checkpoint', + 'retry_allocator_test', + 'test_auto_checkpoint_multiple', + 'test_dist_mnist_backward_deps', + 'test_dist_mnist_multi_comm', + 'test_hdfs3', + 'test_hdfs1', + 'test_dist_allreduce_op', + 'test_parallel_dygraph_sparse_embedding', + 'test_dist_se_resnext_dgc', + 'test_dist_sharding_save', + 'test_dist_fleet_a_sync_optimizer_async', + 'test_gen_nccl_id_op', + 'test_auto_checkpoint', 'test_collective_split_embedding', 'test_parallel_dygraph_sparse_embedding_over_height', - 'test_dist_sparse_tensor_load_momentum', 'test_auto_checkpoint_dist_basic', - 'test_dist_fleet_ps4', 'test_collective_alltoall_api', - 'test_dist_fleet_raw_program_optimizer', 'test_parallel_dygraph_mp_layers', - 'test_dist_fleet_geo', 'test_fleet_raw_program_meta_optimizer', - 'test_sync_batch_norm_op', 'test_dist_mnist_batch_merge', - 'test_fleet_launch_ps', 'test_dist_sparse_tensor_load_sgd', + 'test_dist_sparse_tensor_load_momentum', + 'test_auto_checkpoint_dist_basic', + 'test_dist_fleet_ps4', + 'test_collective_alltoall_api', + 'test_dist_fleet_raw_program_optimizer', + 'test_parallel_dygraph_mp_layers', + 'test_dist_fleet_geo', + 'test_fleet_raw_program_meta_optimizer', + 'test_sync_batch_norm_op', + 'test_dist_mnist_batch_merge', + 'test_fleet_launch_ps', + 'test_dist_sparse_tensor_load_sgd', 'test_dist_fleet_a_sync_optimizer_auto_geo', 'test_dist_lookup_sparse_table_fuse_ops', 'test_dist_fleet_a_sync_optimizer_geo', 'test_multiprocess_dataloader_iterable_dataset_static', 'test_dist_fleet_grad_clip', 'test_fleet_pipeline_meta_optimizer_with_recompute', - 'test_dist_sparse_load_ps0', 'test_collective_barrier_api', - 'test_fleet_pipeline_meta_optimizer', 'test_parallel_dygraph_mnist', - 'test_dist_sparse_tensor_load_adagrad', 'test_new_group', - 'test_imperative_signal_handler', 'test_parallel_dygraph_sharding_parallel', - 'test_dist_hapi_model', 'test_dist_mnist_gradient_merge' + 'test_dist_sparse_load_ps0', + 'test_collective_barrier_api', + 'test_fleet_pipeline_meta_optimizer', + 'test_parallel_dygraph_mnist', + 'test_dist_sparse_tensor_load_adagrad', + 'test_new_group', + 'test_imperative_signal_handler', + 'test_parallel_dygraph_sharding_parallel', + 'test_dist_hapi_model', + 'test_dist_mnist_gradient_merge', + 'test_ir_pass_pipeline', + 'test_rnn_dp', + 'test_parallel_dygraph_no_sync', + 'test_parallel_dygraph_no_sync_gradient_check', + 'test_hybrid_parallel_inference_helper', + 'test_parallel_class_center_sample', + 'test_auto_parallel_data_unshard', + 'small_vector_test', + 'scope_guard_test', + 'cinn_cache_key_test', + 'test_generate_pass_cc', + 'cinn_compiled_object_test', + 'cinn_runner_test', + 'test_build_cinn_pass', + 'cost_model_test', + 'device_event_test', + 'test_fused_layernorm_residual_dropout_bias', + 'test_mkldnn_quantizer', + 'test_fused_residual_dropout_bias', + 'paddle_infer_api_errors_test', + 'test_fused_dropout_act_bias', + 'test_analyzer_lexical_gru_int8', + 'workqueue_test', + 'feed_forward_test', + 'test_analyzer_lexical_gru_int8_multi_gru', + 'test_pow2_warmup_op', + 'test_dlpack', + 'test_ops_roi_align', + 'test_auto_parallel_parallelizer', + 'test_ops_roi_pool', + 'test_backward_infer_var_data_type_shape', + 'test_auto_parallel_completion', + 'test_cuda_device_count', + 'test_cuda_device_name_capability', + 'test_auto_parallel_completion_gpt', + 'test_class_center_sample_op', + 'test_dataset_consistency_inspection', + 'test_cuda_empty_cache', + 'test_cuda_graph', + 'test_disable_signal_handler', + 'test_eig_op', + 'test_eigh_op', + 'test_determinant_op', + 'test_executor_check_fetch_list', + 'test_functional_conv1d_transpose', + 'test_functional_conv1d', + 'test_get_device_properties', + 'test_fill_diagonal_tensor_op', + 'test_linalg_cond', + 'test_memory_analysis', + 'test_matrix_rank_op', + 'test_merged_momentum_op', + 'test_parallel_executor_run_cinn', + 'test_parallel_dygraph_dataparallel_cpuonly', + 'test_eigvals_op', + 'test_sparse_attention_op', + 'test_auto_parallel_partitioner', + 'test_signal', + 'test_auto_parallel_reshard', + 'test_auto_parallel_reshard_mppp', + 'test_auto_parallel_partitioner_gpt', + 'test_fleet_hybrid_meta_optimizer', + 'test_auto_parallel_reshard_serial', + 'test_auto_parallel_reshard_dpmppp', + 'test_clip_mkldnn_op', + 'test_elementwise_sub_mkldnn_op', + 'test_flatten_mkldnn_op', + 'test_slice_mkldnn_op', + 'test_ir_generate_pass', + 'test_ir_subgraph_python_interface', + 'test_trt_convert_concat', + 'test_trt_convert_gather_nd', + 'test_trt_convert_multihead_matmul', + 'test_trt_convert_reduce_sum', + 'save_quant2_model_lstm', + 'test_trt_convert_slice', + 'test_quant2_int8_lstm_mkldnn' ] # mem=0 but always timeout or failed : It run 15 job each time in Single cases; SECONDARY_HIGH_PARALLEL_JOB_NEW = [ - 'test_dataset_conll05', 'test_conv3d_mkldnn_op', 'test_matrix_nms_op', - 'test_data', 'test_analyzer_paddletensor_tensor', - 'test_linear_chain_crf_op', 'test_analyzer_multi_model_prediction', - 'test_default_dtype', 'device_context_test', 'test_analyzer_googlenet', - 'jit_kernel_test', 'profiler_test', 'preprocess_local_pascalvoc', - 'test_conv2d_transpose_layer', 'test_analyzer_int8_googlenet', - 'test_analyzer_seq_pool1_compare_determine', 'save_quant2_model_ernie', + 'test_dataset_conll05', + 'test_conv3d_mkldnn_op', + 'test_matrix_nms_op', + 'test_data', + 'test_analyzer_paddletensor_tensor', + 'test_linear_chain_crf_op', + 'test_analyzer_multi_model_prediction', + 'test_default_dtype', + 'device_context_test', + 'test_analyzer_googlenet', + 'jit_kernel_test', + 'profiler_test', + 'preprocess_local_pascalvoc', + 'test_conv2d_transpose_layer', + 'test_analyzer_int8_googlenet', + 'test_analyzer_seq_pool1_compare_determine', + 'save_quant2_model_ernie', 'test_parallel_executor_seresnext_with_fuse_all_reduce_cpu', - 'test_dataset_uci_housing', 'test_parallel_executor_seresnext_base_cpu', - 'test_dataset_download', 'test_quant_int8_mobilenetv1_mkldnn', - 'test_crf_decoding_op', 'test_conv3d_transpose_layer', - 'test_quant2_int8_mobilenetv1_mkldnn', 'test_softmax_bf16_mkldnn_op', - 'test_quant2_int8_resnet50_range_mkldnn', 'test_pool2d_mkldnn_op', - 'test_flags_mkldnn_ops_on_off', 'test_c_comm_init_op', - 'test_uniform_random_bf16_op', 'test_custom_concat', - 'test_weight_quantization_mobilenetv1', 'test_retinanet_detection_output', - 'test_concat_mkldnn_op', 'test_gaussian_random_mkldnn_op', - 'test_parallel_executor_seresnext_with_reduce_cpu', 'test_dataset_imikolov', - 'test_analyzer_rnn1', 'test_conv2d_mkldnn_op', 'test_conv3d_layer', - 'test_error_clip', 'selected_rows_test', 'test_static_save_load_large', - 'test_bipartite_match_op', 'test_conv2d_layer', - 'test_analyzer_seq_pool1_fuse_statis', 'test_split_plugin', - 'test_analyzer_small_dam', 'test_analyzer_capi_exp_gpu', - 'test_quant2_int8_resnet50_channelwise_mkldnn', 'test_analyzer_bert', - 'test_directory_migration', 'test_elementwise_add_mkldnn_op', - 'test_quant_int8_googlenet_mkldnn', 'test_callback_early_stop', - 'test_quant2_int8_resnet50_mkldnn' + 'test_dataset_uci_housing', + 'test_parallel_executor_seresnext_base_cpu', + 'test_dataset_download', + 'test_quant_int8_mobilenetv1_mkldnn', + 'test_crf_decoding_op', + 'test_conv3d_transpose_layer', + 'test_quant2_int8_mobilenetv1_mkldnn', + 'test_softmax_bf16_mkldnn_op', + 'test_quant2_int8_resnet50_range_mkldnn', + 'test_pool2d_mkldnn_op', + 'test_flags_mkldnn_ops_on_off', + 'test_c_comm_init_op', + 'test_uniform_random_bf16_op', + 'test_custom_concat', + 'test_weight_quantization_mobilenetv1', + 'test_retinanet_detection_output', + 'test_concat_mkldnn_op', + 'test_gaussian_random_mkldnn_op', + 'test_parallel_executor_seresnext_with_reduce_cpu', + 'test_dataset_imikolov', + 'test_analyzer_rnn1', + 'test_conv2d_mkldnn_op', + 'test_conv3d_layer', + 'test_error_clip', + 'selected_rows_test', + 'test_static_save_load_large', + 'test_bipartite_match_op', + 'test_conv2d_layer', + 'test_analyzer_seq_pool1_fuse_statis', + 'test_split_plugin', + 'test_analyzer_small_dam', + 'test_analyzer_capi_exp_gpu', + 'test_quant2_int8_resnet50_channelwise_mkldnn', + 'test_analyzer_bert', + 'test_directory_migration', + 'test_elementwise_add_mkldnn_op', + 'test_quant_int8_googlenet_mkldnn', + 'test_callback_early_stop', ] # mem=0 but always timeout or failed : It run 12 job each time in Single cases; @@ -317,25 +755,22 @@ THIRD_HIGH_PARALLEL_JOB_NEW = [ 'test_analyzer_bfloat16_mobilenetv1', 'test_analyzer_int8_mobilenet_ssd', 'test_dataset_cifar', 'test_dataset_imdb', 'test_dataset_movielens', 'test_datasets', 'test_allgather', 'test_c_concat', 'test_c_split', - 'test_collective_reduce', 'test_collective_sendrecv', - 'test_collective_wait', 'test_cyclic_cifar_dataset', 'test_dyn_rnn', - 'test_gru_op', 'test_multiclass_nms_op', 'test_communicator_geo', - 'test_quant_int8_mobilenetv2_mkldnn', - 'test_post_training_quantization_mnist', 'test_analyzer_seq_pool1', + 'test_collective_reduce', 'test_cyclic_cifar_dataset', 'test_dyn_rnn', + 'test_multiclass_nms_op', 'test_communicator_geo', + 'test_quant_int8_mobilenetv2_mkldnn', 'test_analyzer_seq_pool1', 'test_analyzer_transformer', 'test_analyzer_transformer_profile', 'test_analyzer_int8_mobilenetv1', 'test_analyzer_bfloat16_googlenet', 'test_analyzer_quant_performance_benchmark', 'test_dataset_wmt', 'test_allreduce', 'test_broadcast', 'test_c_identity', - 'test_collective_scatter', 'test_collective_sendrecv_api', - 'test_fleet_utils', 'test_fused_elemwise_activation_op', - 'test_group_norm_op', 'test_reducescatter_api', 'test_fleet_launch_nproc', - 'test_quant_int8_resnet50_mkldnn', 'test_quant2_int8_ernie_mkldnn', - 'convert_model2dot_ernie' + 'test_collective_sendrecv_api', 'test_fleet_utils', + 'test_fused_elemwise_activation_op', 'test_group_norm_op', + 'test_fleet_launch_nproc', 'test_quant_int8_resnet50_mkldnn', + 'test_quant2_int8_ernie_mkldnn', 'convert_model2dot_ernie' ] # mem != 0: It run 7 job each time in Single cases; 4 job each time in Multi cases; 3 job each time in exclusive cases -TETRAD_PARALLEL_JOB_NEW = [ - 'test_meshgrid_op', 'test_gather_op', 'test_word2vec', 'test_analyzer_ner', +FOURTH_HIGH_PARALLEL_JOB_NEW = [ + 'test_meshgrid_op', 'test_word2vec', 'test_analyzer_ner', 'test_fetch_lod_tensor_array', 'test_adagrad_op_v2', 'test_conv2d_fusion_op', 'test_hapi_amp', 'test_metrics', 'test_clip_by_norm_op', 'test_lr_scheduler', 'test_generate_proposals_op', @@ -344,8 +779,8 @@ TETRAD_PARALLEL_JOB_NEW = [ 'test_memcpy_op', 'test_warpctc_op', 'test_row_conv_op', 'test_grid_sample_function', 'test_rnn_nets', 'test_pad3d_op', 'test_imperative_mnist_sorted_gradient', 'tensor_test', - 'test_elementwise_nn_grad', 'test_tensorrt_engine_op', 'test_dot_op', - 'test_real_imag_op', 'test_adam_optimizer_fp32_fp64', 'test_reduce_op', + 'test_tensorrt_engine_op', 'test_dot_op', 'test_real_imag_op', + 'test_adam_optimizer_fp32_fp64', 'test_reduce_op', 'test_density_prior_box_op', 'test_top_k_op', 'test_grid_generator', 'test_randn_op', 'test_activation_mkldnn_op', 'test_lac', 'test_pad_op', 'test_lstmp_op', 'test_loop', 'test_pylayer_op', @@ -419,10 +854,9 @@ TETRAD_PARALLEL_JOB_NEW = [ 'test_optimizer_in_control_flow', 'test_lookup_table_op', 'test_randint_op', 'test_randint_like', 'test_convert_call', 'test_sigmoid_cross_entropy_with_logits_op', 'copy_cross_scope_test', - 'test_normalization_wrapper', 'test_pretrained_model', 'test_flip', - 'test_cosine_similarity_api', 'test_cumsum_op', 'test_range', - 'test_log_loss_op', 'test_where_index', 'test_tril_triu_op', - 'test_lod_reset_op', 'test_lod_tensor', 'test_addmm_op', + 'test_normalization_wrapper', 'test_flip', 'test_cosine_similarity_api', + 'test_cumsum_op', 'test_range', 'test_log_loss_op', 'test_where_index', + 'test_tril_triu_op', 'test_lod_reset_op', 'test_lod_tensor', 'test_addmm_op', 'test_index_select_op', 'test_nvprof', 'test_index_sample_op', 'test_unstack_op', 'test_increment', 'strided_memcpy_test', 'test_target_assign_op', 'test_trt_dynamic_shape_transformer_prune', @@ -431,10 +865,9 @@ TETRAD_PARALLEL_JOB_NEW = [ 'test_callback_reduce_lr_on_plateau', 'test_tile_op', 'test_logical', 'test_deformable_conv_op', 'test_elementwise_add_grad_grad', 'test_simple_rnn_op', 'test_bicubic_interp_op', 'test_batch_norm_op_v2', - 'test_trt_slice_plugin', 'test_custom_relu_op_jit', - 'test_math_op_patch_var_base', 'test_se_resnet', 'test_device_guard', - 'test_elementwise_div_grad_grad', 'test_minus_op', 'test_shard_index_op', - 'test_dygraph_recompute', 'test_momentum_op', 'test_trt_nearest_interp_op', + 'test_custom_relu_op_jit', 'test_math_op_patch_var_base', 'test_se_resnet', + 'test_device_guard', 'test_elementwise_div_grad_grad', 'test_minus_op', + 'test_shard_index_op', 'test_dygraph_recompute', 'test_momentum_op', 'test_modelaverage', 'test_compare_reduce_op', 'test_affine_grid_op', 'test_allclose_layer', 'test_elementwise_pow_op', 'test_trt_subgraph_pass', 'test_adaptive_avg_pool2d', 'test_functional_conv3d', @@ -455,15 +888,15 @@ TETRAD_PARALLEL_JOB_NEW = [ 'test_shuffle_channel_op', 'test_partial_concat_op', 'test_fill_zeros_like2_op', 'test_deformable_conv_v1_op', 'test_complex_grad_accumulated', 'test_sequence_mask', 'test_fill_op', - 'test_imperative_deepcf', 'test_reorder_lod_tensor', - 'test_cross_entropy_loss', 'test_multiply', 'test_partial_program', - 'test_fetch_feed', 'test_group', 'test_trt_reduce_sum_op', - 'data_type_transform_test', 'test_gru_rnn_op', 'test_cudnn_grucell', - 'test_argsort_op', 'test_batch_norm_op', 'test_inplace', - 'test_deprecated_decorator', 'test_complex_cast', 'test_diag_v2', - 'test_iou_similarity_op', 'test_inplace_auto_generated_apis', 'test_dataset', - 'test_bilinear_api', 'test_empty_like_op', 'test_imperative_layer_children', - 'nccl_op_test', 'test_tree_conv_op', 'test_share_data_op', + 'test_imperative_deepcf', 'test_reorder_lod_tensor', 'test_multiply', + 'test_partial_program', 'test_fetch_feed', 'test_group', + 'test_trt_reduce_sum_op', 'data_type_transform_test', 'test_gru_rnn_op', + 'test_cudnn_grucell', 'test_argsort_op', 'test_batch_norm_op', + 'test_inplace', 'test_deprecated_decorator', 'test_complex_cast', + 'test_diag_v2', 'test_iou_similarity_op', + 'test_inplace_auto_generated_apis', 'test_dataset', 'test_bilinear_api', + 'test_empty_like_op', 'test_imperative_layer_children', 'nccl_op_test', + 'test_tree_conv_op', 'test_share_data_op', 'test_ir_memory_optimize_transformer', 'test_lod_append_op', 'test_math_op_patch', 'test_base_layer', 'test_dequantize_log_op', 'test_complex_matmul', 'test_prelu_op', 'test_l1_norm_op', @@ -476,9 +909,9 @@ TETRAD_PARALLEL_JOB_NEW = [ 'test_nll_loss', 'test_imperative_layers', 'test_rnn_decode_api', 'test_imperative_partitial_backward', 'test_where_op', 'test_std_layer', 'test_ir_embedding_eltwise_layernorm_fuse_pass', 'test_multihead_attention', - 'test_ir_memory_optimize_ifelse_op', 'test_grid_sampler_op', - 'test_initializer_nn', 'test_var_base', 'test_fuse_elewise_add_act_pass', - 'test_select_input_output_op', 'test_lstm_op', 'test_break_continue', + 'test_grid_sampler_op', 'test_initializer_nn', 'test_var_base', + 'test_fuse_elewise_add_act_pass', 'test_select_input_output_op', + 'test_lstm_op', 'test_break_continue', 'test_imperative_parallel_coalesce_split', 'test_expand_as_op', 'test_user_defined_quantization', 'test_tensor_to_list', 'test_limit_gpu_memory', 'test_adamax_api', @@ -517,20 +950,20 @@ TETRAD_PARALLEL_JOB_NEW = [ 'test_input_spec', 'test_adam_op', 'test_elementwise_floordiv_op', 'test_eager_deletion_gru_net', 'test_diagonal_op', 'test_imperative_static_runner_mnist', 'test_nearest_interp_op', - 'test_conv2d_transpose_op', 'test_diag_embed', 'test_imperative_basic', - 'test_merge_selectedrows_op', 'test_feed_data_check_shape_type', - 'test_complex_trace_layer', 'test_slice_op', 'test_bmn', - 'test_nn_quant_functional_layers', 'test_broadcast_tensors_op', - 'test_selu_op', 'test_group_norm_op_v2', 'test_tensor_to_numpy', - 'test_queue', 'test_rank_loss_op', 'test_trace_op', 'test_case', - 'test_prroi_pool_op', 'test_op_name_conflict', 'test_psroi_pool_op', - 'test_set_value_op', 'test_ones_like', 'test_assign_value_op', 'test_ema', - 'test_lamb_op', 'test_dgc_momentum_op', 'test_custom_grad_input', - 'test_trunc_op', 'test_bernoulli_op', 'test_custom_relu_model', - 'test_backward', 'test_conv3d_transpose_part2_op', 'test_complex_transpose', - 'test_memory_reuse_exclude_feed_var', 'test_polygon_box_transform', - 'math_function_gpu_test', 'test_program_prune_backward', 'test_ema_fleet', - 'test_fleet_amp_init', 'test_normalize', 'test_correlation', + 'test_diag_embed', 'test_imperative_basic', 'test_merge_selectedrows_op', + 'test_feed_data_check_shape_type', 'test_complex_trace_layer', + 'test_slice_op', 'test_bmn', 'test_nn_quant_functional_layers', + 'test_broadcast_tensors_op', 'test_selu_op', 'test_group_norm_op_v2', + 'test_tensor_to_numpy', 'test_queue', 'test_rank_loss_op', 'test_trace_op', + 'test_case', 'test_prroi_pool_op', 'test_op_name_conflict', + 'test_psroi_pool_op', 'test_set_value_op', 'test_ones_like', + 'test_assign_value_op', 'test_ema', 'test_lamb_op', 'test_dgc_momentum_op', + 'test_custom_grad_input', 'test_trunc_op', 'test_bernoulli_op', + 'test_custom_relu_model', 'test_backward', 'test_conv3d_transpose_part2_op', + 'test_complex_transpose', 'test_memory_reuse_exclude_feed_var', + 'test_polygon_box_transform', 'math_function_gpu_test', + 'test_program_prune_backward', 'test_ema_fleet', 'test_fleet_amp_init', + 'test_normalize', 'test_correlation', 'test_conv_elementwise_add2_act_fuse_pass', 'test_imperative_container_layerlist', 'test_dequantize_abs_max_op', 'test_fuse_optimizer_pass', 'test_optimizer', @@ -543,8 +976,8 @@ TETRAD_PARALLEL_JOB_NEW = [ 'test_fused_fc_elementwise_layernorm_op', 'test_sequence_enumerate_op', 'test_lgamma_op', 'test_modified_huber_loss_op', 'trt_quant_int8_test', 'test_callback_visualdl', 'test_linspace', 'test_update_loss_scaling_op', - 'test_arg_min_max_op', 'test_empty_op', 'test_bce_loss', - 'test_nn_margin_rank_loss', 'test_arg_min_max_v2_op', 'test_variance_layer', + 'test_arg_min_max_op', 'test_bce_loss', 'test_nn_margin_rank_loss', + 'test_arg_min_max_v2_op', 'test_variance_layer', 'test_quantization_scale_pass', 'test_segment_ops', 'test_layers', 'test_isfinite_op', 'test_imperative_qat_channelwise', 'test_eye_op', 'test_imperative_framework', 'test_l1_loss', 'test_ifelse', @@ -557,9 +990,8 @@ TETRAD_PARALLEL_JOB_NEW = [ 'test_conv_elementwise_add_fuse_pass', 'test_auto_growth_gpu_memory_limit', 'test_sequence_reverse', 'test_fc_op', 'test_diagflat', 'test_adamax_op', 'test_op_attr', 'paddle_infer_api_test', 'test_mixed_precision', - 'lite_mul_model_test', 'test_sort_op', 'test_scatter_op', - 'test_imperative_out_scale', 'test_vision_models', - 'test_rnn_encoder_decoder', 'test_fleet_with_asp_amp', + 'lite_mul_model_test', 'test_sort_op', 'test_imperative_out_scale', + 'test_vision_models', 'test_rnn_encoder_decoder', 'test_fleet_with_asp_amp', 'test_partial_eager_deletion_transformer', 'test_imperative_star_gan_with_gradient_penalty', 'test_stack_op', 'test_shuffle_batch_op', 'test_clip_op', 'test_py_func_op', @@ -616,14 +1048,14 @@ TETRAD_PARALLEL_JOB_NEW = [ 'trt_dynamic_shape_test', 'test_traced_layer_err_msg', 'test_conv1d_layer', 'test_asp_optimize', 'test_imperative_container_sequential', 'test_bert', 'test_transformer_api', 'test_linear_interp_v2_op', 'test_pixel_shuffle', - 'test_expand_op', 'test_save_load', 'test_dygraph_multi_forward', - 'test_dropout_op', 'test_while_loop_op', 'float16_gpu_test', 'test_dict', - 'test_bilinear_tensor_product_op', 'test_parallel_executor_pg', 'test_assert', - 'test_smooth_l1_loss_op', 'sequence_padding_test', 'test_analyzer_ernie', - 'test_minimum_op', 'test_yolov3_loss_op', 'test_decayed_adagrad_op', - 'test_split_mkldnn_op', 'test_squeeze_op', 'test_save_inference_model', - 'test_smooth_l1_loss', 'test_bilateral_slice_op', 'test_inplace_abn_op', - 'test_fetch_unmerged', 'test_parallel_executor_feed_persistable_var', + 'test_expand_op', 'test_save_load', 'test_dropout_op', 'test_while_loop_op', + 'float16_gpu_test', 'test_dict', 'test_bilinear_tensor_product_op', + 'test_parallel_executor_pg', 'test_assert', 'test_smooth_l1_loss_op', + 'sequence_padding_test', 'test_analyzer_ernie', 'test_minimum_op', + 'test_yolov3_loss_op', 'test_decayed_adagrad_op', 'test_split_mkldnn_op', + 'test_squeeze_op', 'test_save_inference_model', 'test_smooth_l1_loss', + 'test_bilateral_slice_op', 'test_inplace_abn_op', 'test_fetch_unmerged', + 'test_parallel_executor_feed_persistable_var', 'test_parallel_executor_fetch_isolated_var', 'test_parallel_executor_inference_feed_partial_data', 'test_parallel_executor_seresnext_base_gpu', @@ -631,31 +1063,24 @@ TETRAD_PARALLEL_JOB_NEW = [ 'test_parallel_executor_seresnext_with_fuse_all_reduce_gpu', 'test_parallel_ssa_graph_inference_feed_partial_data', 'test_parallel_executor_seresnext_with_reduce_gpu', 'test_data_norm_op', - 'test_install_check', 'graph_node_test', 'trt_quant_int8_yolov3_r50_test', - 'test_trt_dynamic_shape_ernie', 'trt_mobilenet_test', + 'test_install_check', 'graph_node_test', 'trt_mobilenet_test', 'trt_cascade_rcnn_test', 'trt_resnext_test', 'test_activation_nn_grad', - 'test_trt_dynamic_shape_ernie_fp16_ser_deser', 'test_bilinear_interp_v2_op', - 'test_cross_entropy2_op', 'test_conv3d_op', 'test_layer_norm_op', - 'test_pool3d_op', 'test_static_save_load', 'test_trilinear_interp_v2_op', - 'test_trilinear_interp_op', 'test_trt_gather_nd_op', 'test_trt_gather_op', - 'test_trt_flatten_op', 'test_trt_instance_norm_op', 'test_trt_yolo_box_op', - 'test_trt_reshape_op', 'test_trt_reduce_mean_op', - 'test_trt_dynamic_shape_ernie_ser_deser', 'test_trt_elementwise_op', - 'test_trt_affine_channel_op', 'test_trt_conv_pass', - 'test_softmax_with_cross_entropy_op', 'test_trt_matmul', - 'test_trt_fc_fuse_pass', 'test_trt_pad_op', 'test_trt_scale_op', - 'test_trt_activation_pass', 'trt_resnet50_test', + 'test_trt_dynamic_shape_ernie_fp16_ser_deser', 'test_cross_entropy2_op', + 'test_layer_norm_op', 'test_pool3d_op', 'test_static_save_load', + 'test_trt_flatten_op', 'test_trt_yolo_box_op', 'test_trt_reshape_op', + 'test_trt_elementwise_op', 'test_trt_affine_channel_op', 'test_trt_matmul', + 'test_trt_fc_fuse_pass', 'test_trt_pad_op', 'trt_resnet50_test', 'test_imperative_lod_tensor_to_selected_rows', 'test_gru_unit_op', 'test_amp_check_finite_and_scale_op', 'test_imperative_selected_rows_to_lod_tensor', 'test_imperative_save_load', 'test_add_reader_dependency', 'test_imperative_transformer_sorted_gradient', - 'test_bicubic_interp_v2_op', 'test_rank_attention_op', 'test_seq2seq', + 'test_bicubic_interp_v2_op', 'test_rank_attention_op', 'test_space_to_depth_op', 'test_image_classification', 'test_custom_relu_op_setup', 'test_sgd_op' ] # mem != 0 : It run 7 job each time in Single cases; 3 job each time in exclusive cases -TWO_PARALLEL_JOB_NEW = [ +FIFTH_PARALLEL_JOB_NEW = [ 'test_buffer_shared_memory_reuse_pass', 'test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass', 'test_parallel_executor_crf', 'test_multiprocess_reader_exception', @@ -670,6 +1095,104 @@ TWO_PARALLEL_JOB_NEW = [ 'test_imperative_se_resnext', 'test_norm_nn_grad', 'test_conv2d_api' ] +SIXTH_PARALLEL_JOB_NEW = [ + 'paddle_infer_api_copy_tensor_tester', + 'test_fill_any_op', + 'test_frame_op', + 'test_linalg_pinv_op', + 'test_gumbel_softmax_op', + 'test_matrix_power_op', + 'test_multi_dot_op', + 'test_searchsorted_op', + 'test_overlap_add_op', + 'test_sparse_momentum_op', + 'test_solve_op', + 'test_tensor_fill_diagonal_', + 'test_tensor_fill_diagonal_tensor_', + 'test_vjp_jvp', + 'test_fft_with_static_graph', + 'test_svd_op', + 'test_hessian', + 'test_jacobian', + 'test_spectral_op', + 'test_trt_conv3d_op', + 'test_trt_conv3d_transpose_op', + 'test_trt_tuned_dynamic_shape', + 'test_trt_convert_activation', + 'test_trt_convert_affine_channel', + 'test_trt_convert_anchor_generator', + 'test_trt_fc_fuse_quant_dequant_pass', + 'test_trt_convert_batch_norm', + 'test_trt_conv_quant_dequant_pass', + 'test_trt_convert_elementwise', + 'test_trt_convert_depthwise_conv2d_transpose', + 'test_trt_convert_flatten', + 'test_trt_matmul_quant_dequant', + 'test_trt_convert_dropout', + 'test_trt_convert_conv2d_transpose', + 'test_trt_convert_group_norm', + 'test_trt_convert_layer_norm', + 'test_trt_convert_hard_swish', + 'test_trt_convert_mish', + 'test_trt_convert_gather', + 'test_trt_convert_gelu', + 'test_trt_convert_reshape', + 'test_trt_convert_conv2d_fusion', + 'test_trt_convert_conv2d', + 'test_trt_convert_instance_norm', + 'test_trt_convert_skip_layernorm', + 'test_trt_convert_scale', + 'test_trt_convert_leaky_relu', + 'test_trt_convert_softmax', + 'test_trt_convert_pad', + 'test_trt_convert_tile', + 'test_trt_convert_depthwise_conv2d', + 'test_trt_convert_stack', + 'test_trt_convert_prelu', + 'test_trt_convert_shuffle_channel', + 'test_trt_convert_yolo_box', + 'test_trt_convert_roi_align', + 'test_trt_convert_split', + 'test_trt_convert_transpose', + 'test_standalone_executor', + 'test_trt_convert_pool2d', + 'test_trt_convert_emb_eltwise_layernorm', + 'trt_quant_int8_yolov3_r50_test', + 'test_trt_dynamic_shape_ernie', + 'test_trt_reduce_mean_op', + 'test_trt_nearest_interp_op', + 'test_trt_instance_norm_op', + 'test_trt_conv_pass', + 'test_trt_scale_op', + 'test_trt_slice_plugin', + 'test_trt_gather_op', + 'test_seq2seq', + 'test_bilinear_interp_v2_op', + 'test_conv2d_transpose_op', + 'test_conv3d_op', + 'test_cross_entropy_loss', + 'test_trilinear_interp_op', + 'test_pretrained_model', + 'test_post_training_quantization_mnist', + 'test_collective_wait', + 'test_nn_matmul_v2_grad', + 'test_quant2_int8_resnet50_mkldnn', + 'test_reducescatter_api', + 'test_collective_sendrecv', + 'test_collective_scatter', + 'test_gru_op', + 'test_softmax_with_cross_entropy_op', + 'test_elementwise_nn_grad', +] + +LOWEST_PARALLEL_JOB_NEW = [ + 'heter_server_test', + 'test_scatter_op', + 'test_trt_convert_hard_sigmoid', + 'test_gather_op', + 'test_trilinear_interp_v2_op', +] + # *=======These unittest doesn't occupy GPU memory, just run as CPU unittest=======* # # It run 16 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, # just remove it from this list. @@ -1672,62 +2195,76 @@ TWO_PARALLEL_JOB = [ def main(): - cpu_parallel_job = '^job$' - secondary_cpu_parallel_job = '^job$' - third_cpu_parallel_job = '^job$' + high_parallel_job = '^job$' + secondary_high_parallel_job = '^job$' + third_high_parallel_job = '^job$' - tetrad_parallel_job = '^job$' - two_parallel_job = '^job$' + fourth_high_parallel_job = '^job$' + fifth_high_parallel_job = '^job$' + sixth_high_parallel_job = '^job$' + lowest_high_parallel_job = '^job$' non_parallel_job = '^job$' test_cases = sys.argv[1] test_cases = test_cases.split("\n") if platform.system() == 'Windows': - cpu_parallel_job_list = CPU_PARALLEL_JOB - tetrad_parallel_job_list = TETRAD_PARALLEL_JOB - two_parallel_job_list = TWO_PARALLEL_JOB + high_parallel_job_list = CPU_PARALLEL_JOB + fourth_high_parallel_job_list = TETRAD_PARALLEL_JOB + fifth_high_parallel_job_list = TWO_PARALLEL_JOB else: - cpu_parallel_job_list = HIGH_PARALLEL_JOB_NEW - tetrad_parallel_job_list = TETRAD_PARALLEL_JOB_NEW - two_parallel_job_list = TWO_PARALLEL_JOB_NEW + high_parallel_job_list = HIGH_PARALLEL_JOB_NEW + fourth_high_parallel_job_list = FOURTH_HIGH_PARALLEL_JOB_NEW + fifth_high_parallel_job_list = FIFTH_PARALLEL_JOB_NEW - for unittest in cpu_parallel_job_list: + for unittest in high_parallel_job_list: if unittest in test_cases: - cpu_parallel_job = cpu_parallel_job + '|^' + unittest + '$' + high_parallel_job = high_parallel_job + '|^' + unittest + '$' test_cases.remove(unittest) if platform.system() != 'Windows': for unittest in SECONDARY_HIGH_PARALLEL_JOB_NEW: if unittest in test_cases: - secondary_cpu_parallel_job = secondary_cpu_parallel_job + '|^' + unittest + '$' + secondary_high_parallel_job = secondary_high_parallel_job + '|^' + unittest + '$' test_cases.remove(unittest) + for unittest in THIRD_HIGH_PARALLEL_JOB_NEW: if unittest in test_cases: - third_cpu_parallel_job = third_cpu_parallel_job + '|^' + unittest + '$' + third_high_parallel_job = third_high_parallel_job + '|^' + unittest + '$' + test_cases.remove(unittest) + + for unittest in SIXTH_PARALLEL_JOB_NEW: + if unittest in test_cases: + sixth_high_parallel_job = sixth_high_parallel_job + '|^' + unittest + '$' + test_cases.remove(unittest) + + for unittest in LOWEST_PARALLEL_JOB_NEW: + if unittest in test_cases: + lowest_high_parallel_job = lowest_high_parallel_job + '|^' + unittest + '$' test_cases.remove(unittest) - for unittest in tetrad_parallel_job_list: + for unittest in fourth_high_parallel_job_list: if unittest in test_cases: - tetrad_parallel_job = tetrad_parallel_job + '|^' + unittest + '$' + fourth_high_parallel_job = fourth_high_parallel_job + '|^' + unittest + '$' test_cases.remove(unittest) - for unittest in two_parallel_job_list: + for unittest in fifth_high_parallel_job_list: if unittest in test_cases: - two_parallel_job = two_parallel_job + '|^' + unittest + '$' + fifth_high_parallel_job = fifth_high_parallel_job + '|^' + unittest + '$' test_cases.remove(unittest) for unittest in test_cases: non_parallel_job = non_parallel_job + '|^' + unittest + '$' if platform.system() == 'Windows': - print("{};{};{};{}".format(cpu_parallel_job, tetrad_parallel_job, - two_parallel_job, non_parallel_job)) + print("{};{};{};{}".format(high_parallel_job, fourth_high_parallel_job, + fifth_high_parallel_job, non_parallel_job)) else: - print("{};{};{};{};{};{}".format( - cpu_parallel_job, secondary_cpu_parallel_job, - third_cpu_parallel_job, tetrad_parallel_job, two_parallel_job, - non_parallel_job)) + print("{};{};{};{};{};{};{}".format( + high_parallel_job, secondary_high_parallel_job, + third_high_parallel_job, fourth_high_parallel_job, + fifth_high_parallel_job, sixth_high_parallel_job, + lowest_high_parallel_job, non_parallel_job)) if __name__ == '__main__': -- GitLab