未验证 提交 9eb4d89b 编写于 作者: R Roc 提交者: GitHub

move collective tests into a collective directory (#45223)

* add simple reformated ci files

* update

* add radme for new unitetsts

* add radme for new unitetsts

* add radme for new unitetsts

* reset mlu

* update for samples

* add base api

* reset some dist unit tests

* add warning in grenerated cmakelists file

* update readme for new dist unit tests

* add all collective tests

* remain base file and launcher file

* Update README.md

* Update README.md

* fix env PYTHONPATH

* Update gen_ut_cmakelists.py

* add all collective tests

* add docs for gen_ut_cmakelists.py

* pretify codes

* commont name == "name"

* update for comments

* update function's help

* update for run type

* update readme

* add all collective tests

* add all collective tests

* mv  collective test files

* update for all collective tests

* update

* update

* update

* update for all tests

* update for checking name

* Update Cmakelists.txt

* update testlist.csv

* remain test_parallel_dygraph_dataparallel in unittests

* set broadcast op all platforms

* update

* remain test_broadcast_tensors_op

* fix

* rm some collective files

* update more colective tests

* update

* update

* update
gen_ut_supports recursion

* update

* update

* update

* update

* fix nccl version

* update

* update

* update

* update

* fix a bug and try to pass

* update

* add csv

* update for timeout

* remove tcp store

* fix

* fix

* update

* update

* update for more dist tests

* move multi node tests

* update

* update

* update

* fix for auto parallele

* update

* update path in python file

* update

* reset some test in unittests

* fix

* update readme

* fix

* update

* fix port
上级 2c89bccb
...@@ -364,6 +364,18 @@ if(WIN32) ...@@ -364,6 +364,18 @@ if(WIN32)
endif() endif()
endif() endif()
if(NOT WITH_TESTING AND WITH_MULTINODE_TESTING)
message(
WARNING
"Disable WITH_MULTINODE_TESTING when compiling without TESTING. Force WITH_MULTINODE_TESTING=OFF."
)
set(WITH_MULTINODE_TESTING
OFF
CACHE STRING
"Disable WITH_MULTINODE_TESTING when compiling without TESTING"
FORCE)
endif()
if(NOT WITH_GPU AND WITH_NCCL) if(NOT WITH_GPU AND WITH_NCCL)
message( message(
WARNING "Disable NCCL when compiling without GPU. Force WITH_NCCL=OFF.") WARNING "Disable NCCL when compiling without GPU. Force WITH_NCCL=OFF.")
......
...@@ -7,20 +7,11 @@ set(GC_ENVS FLAGS_eager_delete_tensor_gb=0.0 FLAGS_fast_eager_deletion_mode=1 ...@@ -7,20 +7,11 @@ set(GC_ENVS FLAGS_eager_delete_tensor_gb=0.0 FLAGS_fast_eager_deletion_mode=1
FLAGS_memory_fraction_of_eager_deletion=1.0) FLAGS_memory_fraction_of_eager_deletion=1.0)
set(dist_ENVS http_proxy="" https_proxy="") set(dist_ENVS http_proxy="" https_proxy="")
file(
GLOB MULTINODE_DIST_TEST_OPS
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"test_multinode_*.py")
string(REPLACE ".py" "" MULTINODE_DIST_TEST_OPS "${MULTINODE_DIST_TEST_OPS}")
file( file(
GLOB DIST_TEST_OPS GLOB DIST_TEST_OPS
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"test_dist_*.py") "test_dist_*.py")
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_op") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_op")
if((NOT WITH_NCCL) AND (NOT WITH_RCCL))
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_dgc_nccl")
endif()
string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}") string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}")
...@@ -32,77 +23,22 @@ if((NOT WITH_GPU) ...@@ -32,77 +23,22 @@ if((NOT WITH_GPU)
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_ps_gpu_ctr") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_ps_gpu_ctr")
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_batch_merge") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_batch_merge")
endif() endif()
list(APPEND DIST_TEST_OPS test_parallel_dygraph_dataparallel)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_mnist)
list(APPEND DIST_TEST_OPS test_pipeline)
list(APPEND DIST_TEST_OPS test_ir_pass_pipeline)
list(APPEND DIST_TEST_OPS test_static_model_parallel)
list(APPEND DIST_TEST_OPS test_static_model_parallel_fused_feedforward) list(APPEND DIST_TEST_OPS test_static_model_parallel_fused_feedforward)
list(APPEND DIST_TEST_OPS test_static_model_parallel_fused_attention) list(APPEND DIST_TEST_OPS test_static_model_parallel_fused_attention)
list(APPEND DIST_TEST_OPS test_static_model_parallel_fused_multi_transformer) list(APPEND DIST_TEST_OPS test_static_model_parallel_fused_multi_transformer)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_se_resnext)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_sparse_embedding)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_sparse_embedding_over_height)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_transformer)
if(WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL)
list(APPEND DIST_TEST_OPS test_fleet_graph_execution_meta_optimizer)
list(APPEND DIST_TEST_OPS test_fleet_pipeline_meta_optimizer)
list(APPEND DIST_TEST_OPS test_fleet_pipeline_meta_optimizer_with_recompute)
list(APPEND DIST_TEST_OPS test_fleet_raw_program_meta_optimizer)
list(APPEND DIST_TEST_OPS test_gen_nccl_id_op)
list(APPEND DIST_TEST_OPS test_rnn_dp)
endif()
list(APPEND DIST_TEST_OPS test_parallel_dygraph_unused_variables)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_control_flow)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_no_sync)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_no_sync_gradient_check)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_dataparallel)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_pipeline_parallel)
list(APPEND DIST_TEST_OPS list(APPEND DIST_TEST_OPS
test_parallel_dygraph_pipeline_parallel_with_virtual_stage) test_parallel_dygraph_pipeline_parallel_with_virtual_stage)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_tensor_parallel)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_sharding_parallel)
list(APPEND DIST_TEST_OPS test_dygraph_sharding_optimizer_stage2)
list(APPEND DIST_TEST_OPS test_dygraph_sharding_stage2)
list(APPEND DIST_TEST_OPS test_dygraph_sharding_stage3)
list(APPEND DIST_TEST_OPS test_dygraph_sharding_stage3_for_eager)
list(APPEND DIST_TEST_OPS test_dygraph_group_sharded_api)
list(APPEND DIST_TEST_OPS test_dygraph_group_sharded_api_for_eager)
list(APPEND DIST_TEST_OPS test_auto_parallel_parallelizer)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_mp_layers)
list(APPEND DIST_TEST_OPS test_hybrid_parallel_inference_helper)
list(APPEND DIST_TEST_OPS test_parallel_class_center_sample)
list(APPEND DIST_TEST_OPS test_parallel_margin_cross_entropy)
list(APPEND DIST_TEST_OPS test_auto_parallel_data_unshard) list(APPEND DIST_TEST_OPS test_auto_parallel_data_unshard)
list(APPEND DIST_TEST_OPS test_auto_parallel_save_load) list(APPEND DIST_TEST_OPS test_auto_parallel_save_load)
list(APPEND DIST_TEST_OPS test_auto_parallel_autoconvert) list(APPEND DIST_TEST_OPS test_auto_parallel_autoconvert)
list(APPEND DIST_TEST_OPS test_collective_process_group)
list(APPEND DIST_TEST_OPS test_collective_alltoall_single)
list(APPEND DIST_TEST_OPS test_eager_dist_api)
list(APPEND DIST_TEST_OPS test_collective_batch_isend_irecv)
list(APPEND DIST_TEST_OPS test_collective_reduce_scatter)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_qat)
set(MIXED_DIST_TEST_OPS ${DIST_TEST_OPS}) set(MIXED_DIST_TEST_OPS ${DIST_TEST_OPS})
#remove distribute unittests. #remove distribute unittests.
foreach(TEST_OP ${MULTINODE_DIST_TEST_OPS})
list(APPEND MIXED_DIST_TEST_OPS ${TEST_OP})
endforeach()
list(APPEND MIXED_DIST_TEST_OPS test_dgc_op)
list(APPEND MIXED_DIST_TEST_OPS test_dgc_momentum_op)
list(APPEND MIXED_DIST_TEST_OPS test_dgc_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_simple_dist_transpiler) list(APPEND MIXED_DIST_TEST_OPS test_simple_dist_transpiler)
list(APPEND MIXED_DIST_TEST_OPS test_recv_save_op)
list(APPEND MIXED_DIST_TEST_OPS test_c_comm_init_op)
list(APPEND MIXED_DIST_TEST_OPS test_communicator_async) list(APPEND MIXED_DIST_TEST_OPS test_communicator_async)
list(APPEND MIXED_DIST_TEST_OPS test_communicator_ps_gpu) list(APPEND MIXED_DIST_TEST_OPS test_communicator_ps_gpu)
list(APPEND MIXED_DIST_TEST_OPS test_communicator_geo) list(APPEND MIXED_DIST_TEST_OPS test_communicator_geo)
list(APPEND MIXED_DIST_TEST_OPS test_communicator_half_async)
list(APPEND MIXED_DIST_TEST_OPS test_communicator_sync)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_launch_ps) list(APPEND MIXED_DIST_TEST_OPS test_fleet_launch_ps)
list(APPEND MIXED_DIST_TEST_OPS test_launch_coverage) list(APPEND MIXED_DIST_TEST_OPS test_launch_coverage)
list(APPEND MIXED_DIST_TEST_OPS test_fleetrun) list(APPEND MIXED_DIST_TEST_OPS test_fleetrun)
...@@ -113,32 +49,11 @@ list(APPEND MIXED_DIST_TEST_OPS test_fleet_launch_ascend) ...@@ -113,32 +49,11 @@ list(APPEND MIXED_DIST_TEST_OPS test_fleet_launch_ascend)
list(APPEND MIXED_DIST_TEST_OPS test_ascend_group) list(APPEND MIXED_DIST_TEST_OPS test_ascend_group)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_launch_nproc) list(APPEND MIXED_DIST_TEST_OPS test_fleet_launch_nproc)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_api_input) list(APPEND MIXED_DIST_TEST_OPS test_fleet_api_input)
list(APPEND MIXED_DIST_TEST_OPS test_collective_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_base) list(APPEND MIXED_DIST_TEST_OPS test_fleet_base)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_2) list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_2)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_3) list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_3)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_recompute_meta_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_pipeline_meta_optimizer)
list(APPEND MIXED_DIST_TEST_OPS
test_fleet_pipeline_meta_optimizer_with_recompute)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_raw_program_meta_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_rnn_dp)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_amp_meta_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_amp_init)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_gradient_merge_meta_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_sharding_meta_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_hybrid_meta_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_localsgd_meta_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_lars_meta_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_lamb_meta_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_dgc_meta_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_fp16_allreduce_meta_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_private_function)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_graph_executor)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_meta_optimizer_base)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_distributed_strategy)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_auto) list(APPEND MIXED_DIST_TEST_OPS test_fleet_auto)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_static_mp_layers)
list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_partitioner) list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_partitioner)
list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_partitioner_gpt) list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_partitioner_gpt)
list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_searcher) list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_searcher)
...@@ -148,7 +63,6 @@ list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_reshard_serial) ...@@ -148,7 +63,6 @@ list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_reshard_serial)
list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_reshard_mppp) list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_reshard_mppp)
list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_reshard_dpmppp) list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_reshard_dpmppp)
list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_cost_model) list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_cost_model)
list(APPEND MIXED_DIST_TEST_OPS test_tcp_store)
list(APPEND MIXED_DIST_TEST_OPS test_dygraph_hybrid_dp) list(APPEND MIXED_DIST_TEST_OPS test_dygraph_hybrid_dp)
foreach(TEST_OP ${MIXED_DIST_TEST_OPS}) foreach(TEST_OP ${MIXED_DIST_TEST_OPS})
list(REMOVE_ITEM TEST_OPS ${TEST_OP}) list(REMOVE_ITEM TEST_OPS ${TEST_OP})
...@@ -174,30 +88,8 @@ list(REMOVE_ITEM TEST_OPS test_fuse_gemm_epilogue_pass) ...@@ -174,30 +88,8 @@ list(REMOVE_ITEM TEST_OPS test_fuse_gemm_epilogue_pass)
if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32) if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32)
list(REMOVE_ITEM TEST_OPS test_c_comm_init_all_op) list(REMOVE_ITEM TEST_OPS test_c_comm_init_all_op)
list(REMOVE_ITEM TEST_OPS test_allgather)
list(REMOVE_ITEM TEST_OPS test_c_embedding_op) list(REMOVE_ITEM TEST_OPS test_c_embedding_op)
list(REMOVE_ITEM TEST_OPS test_collective_reduce)
list(REMOVE_ITEM TEST_OPS test_pipeline_parallel) list(REMOVE_ITEM TEST_OPS test_pipeline_parallel)
list(REMOVE_ITEM TEST_OPS test_collective_scatter)
list(REMOVE_ITEM TEST_OPS test_collective_sendrecv)
list(REMOVE_ITEM TEST_OPS test_reducescatter)
list(REMOVE_ITEM TEST_OPS test_reducescatter_api)
list(REMOVE_ITEM TEST_OPS test_collective_split_embedding_none_divisible)
list(REMOVE_ITEM TEST_OPS test_collective_split_row_linear)
list(REMOVE_ITEM TEST_OPS test_collective_split_col_linear)
list(REMOVE_ITEM TEST_OPS test_collective_reduce_api)
list(REMOVE_ITEM TEST_OPS test_collective_scatter_api)
list(REMOVE_ITEM TEST_OPS test_collective_barrier_api)
list(REMOVE_ITEM TEST_OPS test_collective_allreduce_api)
list(REMOVE_ITEM TEST_OPS test_new_group_api)
list(REMOVE_ITEM TEST_OPS test_collective_broadcast_api)
list(REMOVE_ITEM TEST_OPS test_collective_allgather_api)
list(REMOVE_ITEM TEST_OPS test_collective_allgather_object_api)
list(REMOVE_ITEM TEST_OPS test_collective_alltoall_api)
list(REMOVE_ITEM TEST_OPS test_collective_global_gather)
list(REMOVE_ITEM TEST_OPS test_collective_global_scatter)
list(REMOVE_ITEM TEST_OPS test_collective_sendrecv_api)
list(REMOVE_ITEM TEST_OPS test_collective_wait)
list(REMOVE_ITEM TEST_OPS test_memcpy_op) list(REMOVE_ITEM TEST_OPS test_memcpy_op)
list(REMOVE_ITEM TEST_OPS test_raw_program_optimizer) list(REMOVE_ITEM TEST_OPS test_raw_program_optimizer)
list(REMOVE_ITEM TEST_OPS test_fleet_gradient_scale) list(REMOVE_ITEM TEST_OPS test_fleet_gradient_scale)
...@@ -212,7 +104,6 @@ if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32) ...@@ -212,7 +104,6 @@ if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32)
list(REMOVE_ITEM TEST_OPS test_fleet_exe_dist_model_tensor) list(REMOVE_ITEM TEST_OPS test_fleet_exe_dist_model_tensor)
endif() endif()
# Temporally disable test_deprecated_decorator
list(REMOVE_ITEM TEST_OPS test_deprecated_decorator) list(REMOVE_ITEM TEST_OPS test_deprecated_decorator)
if(WIN32) if(WIN32)
...@@ -239,19 +130,15 @@ endif() ...@@ -239,19 +130,15 @@ endif()
if(NOT WITH_DISTRIBUTE OR WIN32) if(NOT WITH_DISTRIBUTE OR WIN32)
# DISTRIBUTE related # DISTRIBUTE related
list(REMOVE_ITEM TEST_OPS test_avoid_twice_initialization) list(REMOVE_ITEM TEST_OPS test_avoid_twice_initialization)
list(REMOVE_ITEM TEST_OPS test_distributed_strategy)
list(REMOVE_ITEM TEST_OPS test_fleet_metric) list(REMOVE_ITEM TEST_OPS test_fleet_metric)
list(REMOVE_ITEM TEST_OPS test_fleet_ps) list(REMOVE_ITEM TEST_OPS test_fleet_ps)
list(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_2) list(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_2)
list(REMOVE_ITEM TEST_OPS test_fleet_utils)
list(REMOVE_ITEM TEST_OPS test_collective_cpu_barrier_with_gloo)
list(REMOVE_ITEM TEST_OPS test_delete_c_identity_op_pass) list(REMOVE_ITEM TEST_OPS test_delete_c_identity_op_pass)
# TODO: Fix these unittests failed on Windows # TODO: Fix these unittests failed on Windows
list(REMOVE_ITEM TEST_OPS test_fake_init_op) list(REMOVE_ITEM TEST_OPS test_fake_init_op)
endif() endif()
if(NOT WITH_DISTRIBUTE) if(NOT WITH_DISTRIBUTE)
list(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_new)
list(REMOVE_ITEM TEST_OPS test_desc_clone_dist) list(REMOVE_ITEM TEST_OPS test_desc_clone_dist)
endif() endif()
...@@ -280,8 +167,8 @@ endif() ...@@ -280,8 +167,8 @@ endif()
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_hybrid_parallel) list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_hybrid_parallel)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_transformer_gloo list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_transformer_gloo)
)# NOTE: @xiongkun03, cpu is too slow, fix it in next PR # NOTE: @xiongkun03, cpu is too slow, fix it in next PR
if(NOT WITH_GLOO) if(NOT WITH_GLOO)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_dataparallel_cpuonly) list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_dataparallel_cpuonly)
...@@ -296,44 +183,15 @@ endif() ...@@ -296,44 +183,15 @@ endif()
if((NOT WITH_GPU) AND (NOT WITH_ROCM)) if((NOT WITH_GPU) AND (NOT WITH_ROCM))
list(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) list(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op)
list(REMOVE_ITEM TEST_OPS test_rank_attention_op list(REMOVE_ITEM TEST_OPS test_rank_attention_op)
)# TODO(shenliang03): rank_attention_op support CPU device in future # TODO(shenliang03): rank_attention_op support CPU device in future
list(REMOVE_ITEM TEST_OPS test_batch_fc_op list(REMOVE_ITEM TEST_OPS test_batch_fc_op)
)# TODO(shenliang03): batch_fc_op support CPU device in future # TODO(shenliang03): batch_fc_op support CPU device in future
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_mnist # TODO(Yancey1989): parallel dygraph support CPU device in future
)# TODO(Yancey1989): parallel dygraph support CPU device in future
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_unused_variables)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_se_resnext)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding_over_height)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_transformer)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sync_batch_norm)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_control_flow)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_no_sync)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_no_sync_gradient_check)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_dataparallel) list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_dataparallel)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_pipeline_parallel)
list(REMOVE_ITEM TEST_OPS list(REMOVE_ITEM TEST_OPS
test_parallel_dygraph_pipeline_parallel_with_virtual_stage) test_parallel_dygraph_pipeline_parallel_with_virtual_stage)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_tensor_parallel)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sharding_parallel)
list(REMOVE_ITEM TEST_OPS test_dygraph_sharding_optimizer_stage2)
list(REMOVE_ITEM TEST_OPS test_dygraph_sharding_stage2)
list(REMOVE_ITEM TEST_OPS test_dygraph_sharding_stage3)
list(REMOVE_ITEM TEST_OPS test_dygraph_sharding_stage3_for_eager)
list(REMOVE_ITEM TEST_OPS test_dygraph_group_sharded_api)
list(REMOVE_ITEM TEST_OPS test_dygraph_group_sharded_api_for_eager)
list(REMOVE_ITEM TEST_OPS test_auto_parallel_parallelizer)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_mp_layers)
list(REMOVE_ITEM TEST_OPS test_imperative_auto_mixed_precision)
list(REMOVE_ITEM TEST_OPS test_imperative_auto_mixed_precision_for_eager)
list(REMOVE_ITEM TEST_OPS test_mixed_precision)
list(REMOVE_ITEM TEST_OPS test_fleet_base_single) list(REMOVE_ITEM TEST_OPS test_fleet_base_single)
list(REMOVE_ITEM TEST_OPS test_dygraph_recompute)
list(REMOVE_ITEM TEST_OPS test_dygraph_recompute_for_eager)
list(REMOVE_ITEM TEST_OPS test_hybrid_parallel_inference_helper)
list(REMOVE_ITEM TEST_OPS test_parallel_class_center_sample)
list(REMOVE_ITEM TEST_OPS test_parallel_margin_cross_entropy)
list(REMOVE_ITEM TEST_OPS test_auto_parallel_partitioner) list(REMOVE_ITEM TEST_OPS test_auto_parallel_partitioner)
list(REMOVE_ITEM TEST_OPS test_auto_parallel_partitioner_gpt) list(REMOVE_ITEM TEST_OPS test_auto_parallel_partitioner_gpt)
list(REMOVE_ITEM TEST_OPS test_auto_parallel_searcher) list(REMOVE_ITEM TEST_OPS test_auto_parallel_searcher)
...@@ -346,12 +204,6 @@ if((NOT WITH_GPU) AND (NOT WITH_ROCM)) ...@@ -346,12 +204,6 @@ if((NOT WITH_GPU) AND (NOT WITH_ROCM))
list(REMOVE_ITEM TEST_OPS test_auto_parallel_data_unshard) list(REMOVE_ITEM TEST_OPS test_auto_parallel_data_unshard)
list(REMOVE_ITEM TEST_OPS test_auto_parallel_save_load) list(REMOVE_ITEM TEST_OPS test_auto_parallel_save_load)
list(REMOVE_ITEM TEST_OPS test_auto_parallel_autoconvert) list(REMOVE_ITEM TEST_OPS test_auto_parallel_autoconvert)
list(REMOVE_ITEM TEST_OPS test_collective_process_group)
list(REMOVE_ITEM TEST_OPS test_collective_alltoall_single)
list(REMOVE_ITEM TEST_OPS test_eager_dist_api)
list(REMOVE_ITEM TEST_OPS test_collective_batch_isend_irecv)
list(REMOVE_ITEM TEST_OPS test_collective_reduce_scatter)
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_qat)
elseif(WITH_GPU) elseif(WITH_GPU)
if(${CUDNN_VERSION} VERSION_LESS 7100) if(${CUDNN_VERSION} VERSION_LESS 7100)
...@@ -359,35 +211,28 @@ elseif(WITH_GPU) ...@@ -359,35 +211,28 @@ elseif(WITH_GPU)
endif() endif()
endif() endif()
if(WITH_NCCL)
if(${NCCL_VERSION} VERSION_LESS 2212)
list(REMOVE_ITEM DIST_TEST_OPS test_parallel_dygraph_sparse_embedding)
list(REMOVE_ITEM DIST_TEST_OPS
test_parallel_dygraph_sparse_embedding_over_height)
list(REMOVE_ITEM DIST_TEST_OPS test_parallel_dygraph_transformer)
endif()
endif()
if((NOT WITH_NCCL) AND (NOT WITH_RCCL)) if((NOT WITH_NCCL) AND (NOT WITH_RCCL))
list(REMOVE_ITEM TEST_OPS test_imperative_group) list(REMOVE_ITEM TEST_OPS test_imperative_group)
list(REMOVE_ITEM TEST_OPS test_new_group_api)
endif() endif()
if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32) if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32)
list(REMOVE_ITEM TEST_OPS test_fused_gate_attention_op) list(REMOVE_ITEM TEST_OPS test_fused_gate_attention_op)
list(REMOVE_ITEM TEST_OPS test_boxps) list(REMOVE_ITEM TEST_OPS test_boxps)
list(REMOVE_ITEM TEST_OPS test_allgather)
list(REMOVE_ITEM TEST_OPS test_reducescatter)
list(REMOVE_ITEM TEST_OPS test_reducescatter_api)
endif() endif()
list(REMOVE_ITEM TEST_OPS test_seq_concat_op list(REMOVE_ITEM TEST_OPS test_seq_concat_op)
)# FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290 # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290
list(REMOVE_ITEM TEST_OPS test_lstm_unit_op list(REMOVE_ITEM TEST_OPS test_lstm_unit_op)
)# # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5185 # # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5185
list(REMOVE_ITEM TEST_OPS test_cond_op) list(REMOVE_ITEM TEST_OPS test_cond_op)
# FIXME(qijun): https://github.com/PaddlePaddle/Paddle/issues/5101#issuecomment-339814957 # FIXME(qijun): https://github.com/PaddlePaddle/Paddle/issues/5101#issuecomment-339814957
list(REMOVE_ITEM TEST_OPS op_test) # op_test is a helper python file, not a test list(REMOVE_ITEM TEST_OPS op_test) # op_test is a helper python file, not a test
list(REMOVE_ITEM TEST_OPS decorator_helper list(REMOVE_ITEM TEST_OPS decorator_helper)
)# decorator_helper is a helper python file, not a test # decorator_helper is a helper python file, not a test
if(APPLE) if(APPLE)
if(NOT WITH_DISTRIBUTE) if(NOT WITH_DISTRIBUTE)
...@@ -623,8 +468,6 @@ list(REMOVE_ITEM TEST_OPS test_fuse_bn_act_pass) ...@@ -623,8 +468,6 @@ list(REMOVE_ITEM TEST_OPS test_fuse_bn_act_pass)
list(REMOVE_ITEM TEST_OPS test_fuse_bn_add_act_pass) list(REMOVE_ITEM TEST_OPS test_fuse_bn_add_act_pass)
list(REMOVE_ITEM TEST_OPS test_imperative_static_runner_mnist) list(REMOVE_ITEM TEST_OPS test_imperative_static_runner_mnist)
list(REMOVE_ITEM TEST_OPS test_imperative_static_runner_while) list(REMOVE_ITEM TEST_OPS test_imperative_static_runner_while)
# disable test_cumsum_op temporaily
# list(REMOVE_ITEM TEST_OPS test_cumsum_op)
# disable this unittest temporarily # disable this unittest temporarily
list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_exception) list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_exception)
...@@ -641,7 +484,6 @@ if(APPLE OR WIN32) ...@@ -641,7 +484,6 @@ if(APPLE OR WIN32)
list(REMOVE_ITEM TEST_OPS test_dataset) list(REMOVE_ITEM TEST_OPS test_dataset)
list(REMOVE_ITEM TEST_OPS test_dataset_dataloader) list(REMOVE_ITEM TEST_OPS test_dataset_dataloader)
list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_base) list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_base)
# list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_exception)
list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_process) list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_process)
list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_fds_clear) list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_fds_clear)
list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_exit_func) list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_exit_func)
...@@ -700,7 +542,6 @@ if((NOT WITH_GPU) ...@@ -700,7 +542,6 @@ if((NOT WITH_GPU)
AND (NOT WITH_XPU) AND (NOT WITH_XPU)
AND NOT (WITH_ASCEND OR WITH_ASCEND_CL)) AND NOT (WITH_ASCEND OR WITH_ASCEND_CL))
list(REMOVE_ITEM TEST_OPS "test_fleet_graph_execution_meta_optimizer") list(REMOVE_ITEM TEST_OPS "test_fleet_graph_execution_meta_optimizer")
list(REMOVE_ITEM TEST_OPS "test_gen_nccl_id_op")
list(REMOVE_ITEM TEST_OPS "test_dist_fleet_grad_clip") list(REMOVE_ITEM TEST_OPS "test_dist_fleet_grad_clip")
list(REMOVE_ITEM TEST_OPS "test_dist_fleet_heter_ctr") list(REMOVE_ITEM TEST_OPS "test_dist_fleet_heter_ctr")
list(REMOVE_ITEM TEST_OPS "test_dist_fleet_ps_gpu_ctr") list(REMOVE_ITEM TEST_OPS "test_dist_fleet_ps_gpu_ctr")
...@@ -814,67 +655,16 @@ if(WITH_DISTRIBUTE) ...@@ -814,67 +655,16 @@ if(WITH_DISTRIBUTE)
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_ps12") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_ps12")
endif() endif()
py_test_modules(test_recv_save_op MODULES test_recv_save_op ENVS ${dist_ENVS})
py_test_modules(test_communicator_async MODULES test_communicator_async ENVS py_test_modules(test_communicator_async MODULES test_communicator_async ENVS
${dist_ENVS}) ${dist_ENVS})
# py_test_modules(test_communicator_ps_gpu MODULES test_communicator_ps_gpu
# ENVS ${dist_ENVS})
py_test_modules(test_communicator_geo MODULES test_communicator_geo ENVS py_test_modules(test_communicator_geo MODULES test_communicator_geo ENVS
${dist_ENVS}) ${dist_ENVS})
py_test_modules(
test_communicator_half_async
MODULES
test_communicator_half_async
ENVS
${dist_ENVS}
FLAGS_communicator_send_queue_size=1
FLAGS_communicator_max_merge_var_num=1)
py_test_modules(
test_communicator_sync
MODULES
test_communicator_sync
ENVS
${dist_ENVS}
FLAGS_communicator_send_queue_size=1
FLAGS_communicator_max_merge_var_num=1)
py_test_modules(test_collective_optimizer MODULES test_collective_optimizer)
if(NOT APPLE) if(NOT APPLE)
py_test_modules(test_fleet_base MODULES test_fleet_base ENVS ${dist_ENVS}) py_test_modules(test_fleet_base MODULES test_fleet_base ENVS ${dist_ENVS})
py_test_modules(test_fleet_base_2 MODULES test_fleet_base_2 ENVS py_test_modules(test_fleet_base_2 MODULES test_fleet_base_2 ENVS
${dist_ENVS}) ${dist_ENVS})
py_test_modules(test_fleet_base_3 MODULES test_fleet_base_3 ENVS py_test_modules(test_fleet_base_3 MODULES test_fleet_base_3 ENVS
${dist_ENVS}) ${dist_ENVS})
py_test_modules(test_fleet_amp_init MODULES test_fleet_amp_init ENVS
${dist_ENVS})
py_test_modules(test_fleet_fp16_allreduce_meta_optimizer MODULES
test_fleet_fp16_allreduce_meta_optimizer ENVS ${dist_ENVS})
py_test_modules(test_fleet_private_function MODULES
test_fleet_private_function ENVS ${dist_ENVS})
py_test_modules(test_fleet_meta_optimizer_base MODULES
test_fleet_meta_optimizer_base ENVS ${dist_ENVS})
py_test_modules(test_fleet_distributed_strategy MODULES
test_fleet_distributed_strategy)
py_test_modules(test_fleet_static_mp_layers MODULES
test_fleet_static_mp_layers)
#py_test_modules(test_fleet_auto MODULES test_fleet_auto ENVS ${dist_ENVS})
if(WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL)
py_test_modules(test_fleet_amp_meta_optimizer MODULES
test_fleet_amp_meta_optimizer ENVS ${dist_ENVS})
py_test_modules(
test_fleet_gradient_merge_meta_optimizer MODULES
test_fleet_gradient_merge_meta_optimizer ENVS ${dist_ENVS})
py_test_modules(test_fleet_graph_executor MODULES
test_fleet_graph_executor ENVS ${dist_ENVS})
py_test_modules(test_fleet_hybrid_meta_optimizer MODULES
test_fleet_hybrid_meta_optimizer ENVS ${dist_ENVS})
py_test_modules(test_fleet_recompute_meta_optimizer MODULES
test_fleet_recompute_meta_optimizer ENVS ${dist_ENVS})
py_test_modules(test_fleet_sharding_meta_optimizer MODULES
test_fleet_sharding_meta_optimizer ENVS ${dist_ENVS})
endif()
if(NOT WIN32) if(NOT WIN32)
py_test_modules(test_auto_parallel_partitioner MODULES py_test_modules(test_auto_parallel_partitioner MODULES
test_auto_parallel_partitioner ENVS ${dist_ENVS}) test_auto_parallel_partitioner ENVS ${dist_ENVS})
...@@ -895,52 +685,16 @@ if(WITH_DISTRIBUTE) ...@@ -895,52 +685,16 @@ if(WITH_DISTRIBUTE)
py_test_modules(test_auto_parallel_cost_model MODULES py_test_modules(test_auto_parallel_cost_model MODULES
test_auto_parallel_cost_model ENVS ${dist_ENVS}) test_auto_parallel_cost_model ENVS ${dist_ENVS})
if(WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL)
py_test_modules(test_fleet_lamb_meta_optimizer MODULES
test_fleet_lamb_meta_optimizer ENVS ${dist_ENVS})
py_test_modules(test_fleet_lars_meta_optimizer MODULES
test_fleet_lars_meta_optimizer ENVS ${dist_ENVS})
py_test_modules(test_fleet_localsgd_meta_optimizer MODULES
test_fleet_localsgd_meta_optimizer ENVS ${dist_ENVS})
endif()
endif() endif()
endif() endif()
if(WITH_DGC) if(NOT WITH_DGC)
# if with dgc, test all dgc tests.
# NOTE. dist dgc tests is already in DIST_TEST_OPS
py_test_modules(test_dgc_op MODULES test_dgc_op)
py_test_modules(test_dgc_momentum_op MODULES test_dgc_momentum_op)
py_test_modules(test_dgc_optimizer MODULES test_dgc_optimizer)
py_test_modules(test_fleet_dgc_meta_optimizer MODULES
test_fleet_dgc_meta_optimizer)
else()
# if not with dgc, must close all dgc tests # if not with dgc, must close all dgc tests
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_dgc_nccl") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_dgc_nccl")
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_se_resnext_dgc") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_se_resnext_dgc")
endif() endif()
# port range (20000, 23000) is reserved for dist-ops
set(dist_ut_port 20001)
if(NOT WIN32)
bash_test_modules(
test_tcp_store
START_BASH
dist_test.sh
LABELS
"RUN_TYPE=EXCLUSIVE"
ENVS
"PADDLE_DIST_UT_PORT=${dist_ut_port}")
math(EXPR dist_ut_port "${dist_ut_port}+1")
endif()
if(NOT APPLE) if(NOT APPLE)
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU OR WITH_ROCM)
bash_test_modules(test_c_comm_init_op START_BASH test_c_comm_init_op.sh
ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR})
py_test_modules(test_launch_coverage MODULES test_launch_coverage) py_test_modules(test_launch_coverage MODULES test_launch_coverage)
endif() endif()
...@@ -971,22 +725,8 @@ if(WITH_DISTRIBUTE) ...@@ -971,22 +725,8 @@ if(WITH_DISTRIBUTE)
PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR})
endif() endif()
# add new dist test # port range (20000, 21200) is reserved for dist-ops
if(WITH_DISTRIBUTE AND WITH_MULTINODE_TESTING) set(dist_ut_port 20001)
foreach(TEST_OP ${MULTINODE_DIST_TEST_OPS})
bash_test_modules(
${TEST_OP}
START_BASH
multinode_dist_test.sh
LABELS
"RUN_TYPE=EXCLUSIVE"
ENVS
"PADDLE_DIST_UT_PORT=${dist_ut_port}")
endforeach()
endif()
# port range (20000, 23000) is reserved for dist-ops
foreach(TEST_OP ${DIST_TEST_OPS}) foreach(TEST_OP ${DIST_TEST_OPS})
bash_test_modules( bash_test_modules(
${TEST_OP} ${TEST_OP}
...@@ -996,8 +736,8 @@ if(WITH_DISTRIBUTE) ...@@ -996,8 +736,8 @@ if(WITH_DISTRIBUTE)
"RUN_TYPE=EXCLUSIVE" "RUN_TYPE=EXCLUSIVE"
ENVS ENVS
"PADDLE_DIST_UT_PORT=${dist_ut_port}") "PADDLE_DIST_UT_PORT=${dist_ut_port}")
math(EXPR dist_ut_port "${dist_ut_port}+20") math(EXPR dist_ut_port "${dist_ut_port}+10")
if(dist_ut_port GREATER_EQUAL 22998) if(dist_ut_port GREATER_EQUAL 21198)
message( message(
FATAL_ERROR "available ports have been exhausted:${dist_ut_port}") FATAL_ERROR "available ports have been exhausted:${dist_ut_port}")
endif() endif()
...@@ -1023,20 +763,6 @@ if(WITH_DISTRIBUTE) ...@@ -1023,20 +763,6 @@ if(WITH_DISTRIBUTE)
"PADDLE_DIST_UT_PORT=${dist_ut_port}" "PADDLE_DIST_UT_PORT=${dist_ut_port}"
PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR})
endif() endif()
if(WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL)
bash_test_modules(
test_new_group
START_BASH
test_new_group.sh
LABELS
"RUN_TYPE=EXCLUSIVE"
ENVS
"PADDLE_DIST_UT_PORT=${dist_ut_port}+20"
PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR})
endif()
endif() endif()
endif() endif()
...@@ -1230,9 +956,8 @@ endif() ...@@ -1230,9 +956,8 @@ endif()
# dist xpu tests: # dist xpu tests:
if(WITH_XPU_BKCL) if(WITH_XPU_BKCL)
#py_test(test_collective_reduce_api_xpu SRCS "test_collective_reduce_api.py")
py_test(test_collective_allreduce_api_xpu py_test(test_collective_allreduce_api_xpu
SRCS "test_collective_allreduce_api.py") SRCS "collective/test_collective_allreduce_api.py")
endif() endif()
if(WITH_HETERPS) if(WITH_HETERPS)
...@@ -1289,8 +1014,6 @@ set_tests_properties( ...@@ -1289,8 +1014,6 @@ set_tests_properties(
test_fetch_unmerged test_fetch_unmerged
test_buffer_shared_memory_reuse_pass test_buffer_shared_memory_reuse_pass
PROPERTIES LABELS "RUN_TYPE=DIST") PROPERTIES LABELS "RUN_TYPE=DIST")
# disable test_parallel_executor_fetch_isolated_var
# set_tests_properties(test_parallel_executor_fetch_isolated_var PROPERTIES LABELS "RUN_TYPE=DIST")
set_tests_properties( set_tests_properties(
test_parallel_executor_crf test_parallel_executor_crf
test_sync_batch_norm_op test_sync_batch_norm_op
...@@ -1311,7 +1034,6 @@ if(NOT WIN32 AND NOT APPLE) ...@@ -1311,7 +1034,6 @@ if(NOT WIN32 AND NOT APPLE)
PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
set_tests_properties(test_imperative_data_loader_fds_clear set_tests_properties(test_imperative_data_loader_fds_clear
PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
# set_tests_properties(test_imperative_data_loader_exception PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
set_tests_properties(test_multiprocess_dataloader_static set_tests_properties(test_multiprocess_dataloader_static
PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
set_tests_properties(test_multiprocess_dataloader_dynamic set_tests_properties(test_multiprocess_dataloader_dynamic
...@@ -1341,32 +1063,16 @@ if(NOT WIN32) ...@@ -1341,32 +1063,16 @@ if(NOT WIN32)
endif() endif()
endif() endif()
if(WITH_DISTRIBUTE AND NOT WIN32)
set_tests_properties(test_fleet_utils PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_cpu_barrier_with_gloo PROPERTIES TIMEOUT
40)
endif()
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
set_tests_properties(test_communicator_half_async PROPERTIES TIMEOUT 120)
set_tests_properties(test_dist_fleet_ctr2 PROPERTIES TIMEOUT 200) set_tests_properties(test_dist_fleet_ctr2 PROPERTIES TIMEOUT 200)
set_tests_properties(test_dist_fleet_sparse_embedding_ctr PROPERTIES TIMEOUT set_tests_properties(test_dist_fleet_sparse_embedding_ctr PROPERTIES TIMEOUT
200) 200)
set_tests_properties(test_dist_fleet_infer PROPERTIES TIMEOUT 200) set_tests_properties(test_dist_fleet_infer PROPERTIES TIMEOUT 200)
set_tests_properties(test_dist_fleet_raw_program_optimizer PROPERTIES TIMEOUT
120)
set_tests_properties(test_dist_fleet_raw_program_optimizer_fuse_allreduce set_tests_properties(test_dist_fleet_raw_program_optimizer_fuse_allreduce
PROPERTIES TIMEOUT 60) PROPERTIES TIMEOUT 60)
set_tests_properties(test_dist_dygraph_apis PROPERTIES TIMEOUT 120) set_tests_properties(test_dist_dygraph_apis PROPERTIES TIMEOUT 120)
endif() endif()
if(WITH_DISTRIBUTE AND NOT APPLE)
if(WITH_GPU OR WITH_ROCM)
set_tests_properties(test_c_comm_init_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_dist_mnist_gradient_merge PROPERTIES TIMEOUT 360)
endif()
endif()
# setting timeout value as 15S # setting timeout value as 15S
set_tests_properties(test_run PROPERTIES TIMEOUT 120) set_tests_properties(test_run PROPERTIES TIMEOUT 120)
set_tests_properties(test_sync_batch_norm_op PROPERTIES TIMEOUT 120) set_tests_properties(test_sync_batch_norm_op PROPERTIES TIMEOUT 120)
...@@ -1568,59 +1274,12 @@ set_tests_properties(test_graph_send_uv_op PROPERTIES TIMEOUT 60) ...@@ -1568,59 +1274,12 @@ set_tests_properties(test_graph_send_uv_op PROPERTIES TIMEOUT 60)
if(WITH_DISTRIBUTE if(WITH_DISTRIBUTE
AND WITH_GPU AND WITH_GPU
AND WITH_NCCL) AND WITH_NCCL)
set_tests_properties(test_parallel_dygraph_dataparallel PROPERTIES TIMEOUT
120)
set_tests_properties(test_parallel_dygraph_mnist PROPERTIES TIMEOUT 200)
set_tests_properties(test_parallel_dygraph_se_resnext PROPERTIES TIMEOUT 200)
set_tests_properties(test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT
350)
set_tests_properties(test_parallel_dygraph_control_flow PROPERTIES TIMEOUT
350)
set_tests_properties(test_parallel_dygraph_no_sync PROPERTIES TIMEOUT 300)
set_tests_properties(test_parallel_dygraph_no_sync_gradient_check
PROPERTIES TIMEOUT 60)
set_tests_properties(test_parallel_dygraph_pipeline_parallel
PROPERTIES TIMEOUT 500)
set_tests_properties( set_tests_properties(
test_parallel_dygraph_pipeline_parallel_with_virtual_stage test_parallel_dygraph_pipeline_parallel_with_virtual_stage
PROPERTIES TIMEOUT 500) PROPERTIES TIMEOUT 500)
set_tests_properties(test_parallel_dygraph_tensor_parallel PROPERTIES TIMEOUT
200)
set_tests_properties(test_parallel_dygraph_sharding_parallel
PROPERTIES TIMEOUT 120)
set_tests_properties(test_dygraph_sharding_optimizer_stage2 PROPERTIES TIMEOUT
120)
set_tests_properties(test_dygraph_sharding_stage2 PROPERTIES TIMEOUT 200)
set_tests_properties(test_dygraph_sharding_stage3 PROPERTIES TIMEOUT 350)
set_tests_properties(test_dygraph_sharding_stage3_for_eager PROPERTIES TIMEOUT
350)
set_tests_properties(test_dygraph_group_sharded_api PROPERTIES TIMEOUT 120)
set_tests_properties(test_dygraph_group_sharded_api_for_eager
PROPERTIES TIMEOUT 120)
set_tests_properties(test_auto_parallel_parallelizer PROPERTIES TIMEOUT 120)
set_tests_properties(test_parallel_dygraph_mp_layers PROPERTIES TIMEOUT 120)
set_tests_properties(test_hybrid_parallel_inference_helper PROPERTIES TIMEOUT
120)
set_tests_properties(test_parallel_class_center_sample PROPERTIES TIMEOUT 120)
set_tests_properties(test_parallel_margin_cross_entropy PROPERTIES TIMEOUT
120)
set_tests_properties(test_auto_parallel_data_unshard PROPERTIES TIMEOUT 120) set_tests_properties(test_auto_parallel_data_unshard PROPERTIES TIMEOUT 120)
set_tests_properties(test_auto_parallel_save_load PROPERTIES TIMEOUT 120) set_tests_properties(test_auto_parallel_save_load PROPERTIES TIMEOUT 120)
set_tests_properties(test_auto_parallel_autoconvert PROPERTIES TIMEOUT 120) set_tests_properties(test_auto_parallel_autoconvert PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_process_group PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_alltoall_single PROPERTIES TIMEOUT 60)
set_tests_properties(test_eager_dist_api PROPERTIES TIMEOUT 100)
set_tests_properties(test_collective_batch_isend_irecv PROPERTIES TIMEOUT 100)
set_tests_properties(test_collective_reduce_scatter PROPERTIES TIMEOUT 100)
set_tests_properties(test_parallel_dygraph_qat PROPERTIES TIMEOUT 120)
if(${NCCL_VERSION} VERSION_GREATER_EQUAL 2212)
set_tests_properties(test_parallel_dygraph_sparse_embedding
PROPERTIES TIMEOUT 200)
set_tests_properties(test_parallel_dygraph_transformer PROPERTIES TIMEOUT
200)
set_tests_properties(test_parallel_dygraph_sparse_embedding_over_height
PROPERTIES TIMEOUT 150)
endif()
endif() endif()
if(APPLE) if(APPLE)
...@@ -1633,63 +1292,23 @@ if(APPLE) ...@@ -1633,63 +1292,23 @@ if(APPLE)
endif() endif()
if((WITH_ROCM OR WITH_GPU) AND NOT WIN32) if((WITH_ROCM OR WITH_GPU) AND NOT WIN32)
set_tests_properties(test_collective_allgather_api PROPERTIES TIMEOUT 300)
set_tests_properties(test_collective_allgather_object_api PROPERTIES TIMEOUT
120)
set_tests_properties(test_collective_alltoall_api PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_global_gather PROPERTIES TIMEOUT 200)
set_tests_properties(test_collective_global_scatter PROPERTIES TIMEOUT 200)
set_tests_properties(test_collective_sendrecv_api PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_broadcast_api PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_allreduce_api PROPERTIES TIMEOUT 120)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
set_tests_properties(test_new_group_api PROPERTIES TIMEOUT 120)
set_tests_properties(test_pipeline PROPERTIES TIMEOUT 120)
set_tests_properties(test_ir_pass_pipeline PROPERTIES TIMEOUT 120)
set_tests_properties(test_static_model_parallel PROPERTIES TIMEOUT 240)
set_tests_properties(test_static_model_parallel_fused_feedforward set_tests_properties(test_static_model_parallel_fused_feedforward
PROPERTIES TIMEOUT 120) PROPERTIES TIMEOUT 120)
set_tests_properties(test_static_model_parallel_fused_attention set_tests_properties(test_static_model_parallel_fused_attention
PROPERTIES TIMEOUT 120) PROPERTIES TIMEOUT 120)
set_tests_properties(test_static_model_parallel_fused_multi_transformer set_tests_properties(test_static_model_parallel_fused_multi_transformer
PROPERTIES TIMEOUT 120) PROPERTIES TIMEOUT 120)
set_tests_properties( set_tests_properties(test_pipeline_parallel PROPERTIES LABELS
test_collective_split_embedding_none_divisible "RUN_TYPE=DIST")
test_collective_split_row_linear set_tests_properties(test_reducescatter_api PROPERTIES TIMEOUT 120)
test_collective_split_col_linear set_tests_properties(test_reducescatter PROPERTIES TIMEOUT 120)
test_collective_scatter_api set_tests_properties(test_allgather PROPERTIES TIMEOUT 120)
test_collective_barrier_api
test_collective_reduce_api
test_pipeline_parallel
test_collective_allreduce_api
test_new_group_api
test_collective_broadcast_api
test_collective_allgather_api
test_collective_allgather_object_api
test_collective_alltoall_api
test_collective_global_gather
test_collective_global_scatter
PROPERTIES LABELS "RUN_TYPE=DIST")
endif() endif()
set_tests_properties(test_paddle_multiprocessing PROPERTIES TIMEOUT 120) set_tests_properties(test_paddle_multiprocessing PROPERTIES TIMEOUT 120)
set_tests_properties(test_reducescatter_api PROPERTIES TIMEOUT 120)
set_tests_properties(test_reducescatter PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_reduce_api PROPERTIES TIMEOUT 120)
set_tests_properties(test_pipeline_parallel PROPERTIES TIMEOUT 120) set_tests_properties(test_pipeline_parallel PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_reduce PROPERTIES TIMEOUT 120)
set_tests_properties(test_allgather PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_scatter_api PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_barrier_api PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_scatter PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_sendrecv PROPERTIES TIMEOUT 120)
endif() endif()
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU OR WITH_ROCM)
set_tests_properties(test_imperative_auto_mixed_precision PROPERTIES TIMEOUT
300)
set_tests_properties(test_imperative_auto_mixed_precision_for_eager
PROPERTIES TIMEOUT 300)
set_tests_properties(test_parallel_dygraph_sync_batch_norm PROPERTIES TIMEOUT
120)
set_tests_properties(test_rank_attention_op PROPERTIES TIMEOUT 120) set_tests_properties(test_rank_attention_op PROPERTIES TIMEOUT 120)
endif() endif()
set_tests_properties(test_inplace_addto_strategy PROPERTIES TIMEOUT 120) set_tests_properties(test_inplace_addto_strategy PROPERTIES TIMEOUT 120)
......
...@@ -6,64 +6,271 @@ set(LOCAL_ALL_ARCH ON) ...@@ -6,64 +6,271 @@ set(LOCAL_ALL_ARCH ON)
set(LOCAL_ALL_PLAT ON) set(LOCAL_ALL_PLAT ON)
if((WITH_GPU OR WITH_ROCM) AND (LINUX)) if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules( py_test_modules(
test_allreduce test_allreduce MODULES test_allreduce ENVS
MODULES "PYTHONPATH=..:${PADDLE_BINARY_DIR}/python;http_proxy=;https_proxy=")
test_allreduce set_tests_properties(test_allreduce PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_broadcast MODULES test_broadcast ENVS
"PYTHONPATH=..:${PADDLE_BINARY_DIR}/python;http_proxy=;https_proxy=")
set_tests_properties(test_broadcast PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_c_concat MODULES test_c_concat ENVS
"PYTHONPATH=..:${PADDLE_BINARY_DIR}/python;http_proxy=;https_proxy=")
set_tests_properties(test_c_concat PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_c_identity MODULES test_c_identity ENVS
"PYTHONPATH=..:${PADDLE_BINARY_DIR}/python;http_proxy=;https_proxy=")
set_tests_properties(test_c_identity PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_c_split MODULES test_c_split ENVS
"PYTHONPATH=..:${PADDLE_BINARY_DIR}/python;http_proxy=;https_proxy=")
set_tests_properties(test_c_split PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
if((WITH_ROCM OR WITH_GPU) AND (LINUX))
bash_test_modules(
test_collective_split_embedding
START_BASH
../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS ENVS
"PADDLE_DIST_UT_PORT=20071;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python;http_proxy=;https_proxy=" "PADDLE_DIST_UT_PORT=21288;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python;http_proxy=;https_proxy="
) )
set_tests_properties(test_allreduce PROPERTIES TIMEOUT "120" RUN_SERIAL 1) set_tests_properties(test_collective_split_embedding PROPERTIES TIMEOUT "300"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_allgather_api MODULES test_collective_allgather_api ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_allgather_api PROPERTIES TIMEOUT "300"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_allgather_object_api MODULES
test_collective_allgather_object_api ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_allgather_object_api
PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif() endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX)) if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules( py_test_modules(
test_broadcast test_collective_allreduce_api MODULES test_collective_allreduce_api ENVS
MODULES "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
test_broadcast set_tests_properties(test_collective_allreduce_api PROPERTIES TIMEOUT "120"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_alltoall_api MODULES test_collective_alltoall_api ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_alltoall_api PROPERTIES TIMEOUT "120"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
bash_test_modules(
test_collective_alltoall_single
START_BASH
../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS ENVS
"PADDLE_DIST_UT_PORT=20073;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python;http_proxy=;https_proxy=" "PADDLE_DIST_UT_PORT=21290;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python"
) )
set_tests_properties(test_broadcast PROPERTIES TIMEOUT "120" RUN_SERIAL 1) set_tests_properties(test_collective_alltoall_single PROPERTIES TIMEOUT "350"
RUN_SERIAL 1)
endif() endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX)) if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules( py_test_modules(
test_c_concat test_collective_barrier_api MODULES test_collective_barrier_api ENVS
MODULES "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
test_c_concat set_tests_properties(test_collective_barrier_api PROPERTIES TIMEOUT "300"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
bash_test_modules(
test_collective_batch_isend_irecv
START_BASH
../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS ENVS
"PADDLE_DIST_UT_PORT=20075;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python;http_proxy=;https_proxy=" "PADDLE_DIST_UT_PORT=21292;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python"
) )
set_tests_properties(test_c_concat PROPERTIES TIMEOUT "120" RUN_SERIAL 1) set_tests_properties(test_collective_batch_isend_irecv
PROPERTIES TIMEOUT "350" RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_broadcast_api MODULES test_collective_broadcast_api ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_broadcast_api PROPERTIES TIMEOUT "120"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_cpu_barrier_with_gloo MODULES
test_collective_cpu_barrier_with_gloo ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_cpu_barrier_with_gloo
PROPERTIES TIMEOUT "300" RUN_SERIAL 1)
endif() endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX)) if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules( py_test_modules(
test_c_identity test_collective_global_gather MODULES test_collective_global_gather ENVS
MODULES "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
test_c_identity set_tests_properties(test_collective_global_gather PROPERTIES TIMEOUT "200"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_global_scatter MODULES test_collective_global_scatter ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_global_scatter PROPERTIES TIMEOUT "200"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_optimizer MODULES test_collective_optimizer ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_optimizer PROPERTIES TIMEOUT "300"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
bash_test_modules(
test_collective_process_group
START_BASH
../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS ENVS
"PADDLE_DIST_UT_PORT=20077;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python;http_proxy=;https_proxy=" "PADDLE_DIST_UT_PORT=21294;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python"
) )
set_tests_properties(test_c_identity PROPERTIES TIMEOUT "120" RUN_SERIAL 1) set_tests_properties(test_collective_process_group PROPERTIES TIMEOUT "350"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_reduce MODULES test_collective_reduce ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_reduce PROPERTIES TIMEOUT "300"
RUN_SERIAL 1)
endif() endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX)) if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules( py_test_modules(
test_c_split test_collective_reduce_api MODULES test_collective_reduce_api ENVS
MODULES "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
test_c_split set_tests_properties(test_collective_reduce_api PROPERTIES TIMEOUT "300"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
bash_test_modules(
test_collective_reduce_scatter
START_BASH
../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS ENVS
"PADDLE_DIST_UT_PORT=20079;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python;http_proxy=;https_proxy=" "PADDLE_DIST_UT_PORT=21296;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python"
) )
set_tests_properties(test_c_split PROPERTIES TIMEOUT "120" RUN_SERIAL 1) set_tests_properties(test_collective_reduce_scatter PROPERTIES TIMEOUT "350"
RUN_SERIAL 1)
endif() endif()
if((WITH_ROCM OR WITH_GPU) AND (LINUX)) if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_scatter MODULES test_collective_scatter ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_scatter PROPERTIES TIMEOUT "300"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_scatter_api MODULES test_collective_scatter_api ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_scatter_api PROPERTIES TIMEOUT "300"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_sendrecv MODULES test_collective_sendrecv ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_sendrecv PROPERTIES TIMEOUT "300"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_sendrecv_api MODULES test_collective_sendrecv_api ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_sendrecv_api PROPERTIES TIMEOUT "120"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_split_col_linear MODULES test_collective_split_col_linear
ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_split_col_linear
PROPERTIES TIMEOUT "300" RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_split_embedding_none_divisible MODULES
test_collective_split_embedding_none_divisible ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_split_embedding_none_divisible
PROPERTIES TIMEOUT "300" RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_split_row_linear MODULES test_collective_split_row_linear
ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_split_row_linear
PROPERTIES TIMEOUT "300" RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_wait MODULES test_collective_wait ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_wait PROPERTIES TIMEOUT "300" RUN_SERIAL
1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_eager_dist_api MODULES test_eager_dist_api ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_eager_dist_api PROPERTIES TIMEOUT "120" RUN_SERIAL
1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_new_group_api MODULES test_new_group_api ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_new_group_api PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
if((WITH_GPU
OR WITH_ROCM
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND LOCAL_ALL_PLAT)
bash_test_modules( bash_test_modules(
test_collective_split_embedding test_gen_nccl_id_op
START_BASH START_BASH
../dist_test.sh ../dist_test.sh
LABELS LABELS
"RUN_TYPE=DIST" "RUN_TYPE=DIST"
ENVS ENVS
"PADDLE_DIST_UT_PORT=20081;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python;http_proxy=;https_proxy=" "PADDLE_DIST_UT_PORT=21298;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python"
) )
set_tests_properties(test_collective_split_embedding PROPERTIES TIMEOUT "300" set_tests_properties(test_gen_nccl_id_op PROPERTIES RUN_SERIAL 1)
RUN_SERIAL 1)
endif() endif()
add_subdirectory(fleet)
add_subdirectory(multinode)
...@@ -6,16 +6,15 @@ ...@@ -6,16 +6,15 @@
and specify the properties for the new unit test and specify the properties for the new unit test
the properties are the following: the properties are the following:
* `name`: the test's name * `name`: the test's name
* `os`: The supported operator system, ignoring case. If the test run in multiple operator systems, use ";" to split systems, forexample, `apple;linux` means the test runs on both Apple and Linux. The supported values are `linux`,`win32` and `apple`. If the value is empty, this means the test runs on all opertaor systems. * `os`: The supported operator system, ignoring case. If the test run in multiple operator systems, use ";" to split systems, for example, `apple;linux` means the test runs on both Apple and Linux. The supported values are `linux`,`win32` and `apple`. If the value is empty, this means the test runs on all opertaor systems.
* `arch`: the device's architecture. similar to `os`, multiple valuse ars splited by ";" and ignoring case. The supported architectures are `gpu`, `xpu`, `npu` and `rocm`. * `arch`: the device's architecture. similar to `os`, multiple valuse ars splited by ";" and ignoring case. The supported architectures are `gpu`, `xpu`, `ASCEND`, `ASCEND_CL` and `rocm`.
* `timeout`: timeout of a unittest, whose unit is second. * `timeout`: timeout of a unittest, whose unit is second.
* `run_type`: run_type of a unittest. Supported values are `NIGHTLY`, `EXCLUSIVE`, `CINN`, `DIST`, `GPUPS`, `INFER`, `EXCLUSIVE:NIGHTLY`, `DIST:NIGHTLY`,which are case-insensitive. * `run_type`: run_type of a unittest. Supported values are `NIGHTLY`, `EXCLUSIVE`, `CINN`, `DIST`, `GPUPS`, `INFER`, `EXCLUSIVE:NIGHTLY`, `DIST:NIGHTLY`,which are case-insensitive.
* `launcher`: the test launcher.Supported values are test_runner.py, dist_test.sh and custom scripts' name. * `launcher`: the test launcher.Supported values are test_runner.py, dist_test.sh and custom scripts' name.
* `dist_ut_port`: the starting port used in a distributed unit test * `num_port`: the number os port used in a distributed unit test
* `run_serial`: whether in serial mode. the value can be 1 or 0.Default (empty) is 0. * `run_serial`: whether in serial mode. the value can be 1 or 0.Default (empty) is 0.
* `ENVS`: required environments. multiple envirenmonts are splited by ";". * `ENVS`: required environments. multiple envirenmonts are splited by ";".
* `conditions`: extra required conditions for some tests. the value is a boolean expression in cmake programmer. * `conditions`: extra required conditions for some tests. The value is a list of boolean expression in cmake programmer, splited with ";". For example, the value can be `WITH_DGC;NOT WITH_NCCL` or `WITH_NCCL;${NCCL_VERSION} VERSION_GREATER_EQUAL 2212`,The relationship between these expressions is a conjunction.
### step 3. Generate CmakeLists.txt ### step 3. Generate CmakeLists.txt
Run the cmd: Run the cmd:
......
# This file is generated by ${PADDLE_ROOT}/tools/gen_ut_cmakelists.py.
# Please don't modify this file manually.
# If you need to change unittests in this file, please modify testslist.csv in the current directory
# and then run the command `python3 ${PADDLE_ROOT}/tools/gen_ut_cmakelists.py -f ${CURRENT_DIRECTORY}/testslist.csv`
set(LOCAL_ALL_ARCH ON)
set(LOCAL_ALL_PLAT ON)
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND LOCAL_ALL_PLAT)
py_test_modules(
test_fleet_sharding_meta_optimizer MODULES
test_fleet_sharding_meta_optimizer ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_sharding_meta_optimizer
PROPERTIES TIMEOUT "350" RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND (LINUX OR WIN32))
py_test_modules(
test_fleet_static_mp_layers MODULES test_fleet_static_mp_layers ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_static_mp_layers PROPERTIES RUN_SERIAL 1)
endif()
if(WITH_DGC)
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
py_test_modules(
test_dgc_op MODULES test_dgc_op ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_dgc_op PROPERTIES RUN_SERIAL 1)
endif()
endif()
if(WITH_DGC)
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
py_test_modules(
test_dgc_optimizer MODULES test_dgc_optimizer ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_dgc_optimizer PROPERTIES RUN_SERIAL 1)
endif()
endif()
if(WITH_NCCL)
if((WITH_GPU) AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_margin_cross_entropy
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21200;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_margin_cross_entropy
PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
endif()
if(WITH_NCCL)
if((WITH_GPU) AND LOCAL_ALL_PLAT)
bash_test_modules(
test_dygraph_sharding_stage3
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21202;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_dygraph_sharding_stage3 PROPERTIES TIMEOUT "350"
RUN_SERIAL 1)
endif()
endif()
if(WITH_NCCL)
if(${NCCL_VERSION} VERSION_GREATER_EQUAL 2212)
if((WITH_GPU) AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_transformer
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21204;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_transformer
PROPERTIES RUN_SERIAL 1)
endif()
endif()
endif()
if((WITH_ROCM) AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_transformer
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21206;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_transformer PROPERTIES RUN_SERIAL
1)
endif()
if(LOCAL_ALL_ARCH AND (LINUX OR WIN32))
py_test_modules(
test_fleet_fp16_allreduce_meta_optimizer MODULES
test_fleet_fp16_allreduce_meta_optimizer ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_fp16_allreduce_meta_optimizer
PROPERTIES RUN_SERIAL 1)
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND LOCAL_ALL_PLAT)
bash_test_modules(
test_rnn_dp
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21208;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_rnn_dp PROPERTIES RUN_SERIAL 1)
endif()
if(WITH_NCCL)
if((WITH_GPU) AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_mp_layers
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21210;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_mp_layers
PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
endif()
if(LOCAL_ALL_ARCH AND (LINUX OR APPLE))
bash_test_modules(
test_tcp_store
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21212;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_tcp_store PROPERTIES RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_dygraph_sharding_stage3_for_eager
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21214;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_dygraph_sharding_stage3_for_eager
PROPERTIES TIMEOUT "350" RUN_SERIAL 1)
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND LOCAL_ALL_PLAT)
bash_test_modules(
test_fleet_graph_execution_meta_optimizer
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21216;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_fleet_graph_execution_meta_optimizer
PROPERTIES RUN_SERIAL 1)
endif()
if(WITH_NCCL)
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
py_test_modules(
test_communicator_half_async
MODULES
test_communicator_half_async
ENVS
"FLAGS_communicator_send_queue_size=1;FLAGS_communicator_max_merge_var_num=1;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_communicator_half_async PROPERTIES TIMEOUT "120"
RUN_SERIAL 1)
endif()
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND LOCAL_ALL_PLAT)
py_test_modules(
test_fleet_graph_executor MODULES test_fleet_graph_executor ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_graph_executor PROPERTIES RUN_SERIAL 1)
endif()
if((WITH_GPU) AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_pipeline_parallel
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21218;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_pipeline_parallel
PROPERTIES TIMEOUT "500" RUN_SERIAL 1)
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND (LINUX))
py_test_modules(
test_fleet_localsgd_meta_optimizer MODULES
test_fleet_localsgd_meta_optimizer ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_localsgd_meta_optimizer PROPERTIES RUN_SERIAL
1)
endif()
if(WITH_NCCL)
if((WITH_GPU) AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_class_center_sample
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21220;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_class_center_sample
PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_pipeline
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21222;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_pipeline PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND (LINUX OR APPLE))
py_test_modules(
test_fleet_utils MODULES test_fleet_utils ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_utils PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_static_model_parallel
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21224;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_static_model_parallel PROPERTIES TIMEOUT "240"
RUN_SERIAL 1)
endif()
if(WITH_NCCL)
if((WITH_GPU) AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_no_sync
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21226;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_no_sync PROPERTIES TIMEOUT "300"
RUN_SERIAL 1)
endif()
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_dygraph_sharding_stage2
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21228;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_dygraph_sharding_stage2 PROPERTIES TIMEOUT "200"
RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_control_flow
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21230;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_control_flow
PROPERTIES TIMEOUT "350" RUN_SERIAL 1)
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND LOCAL_ALL_PLAT)
bash_test_modules(
test_fleet_lars_meta_optimizer
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21232;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_fleet_lars_meta_optimizer PROPERTIES RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_hybrid_parallel_inference_helper
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21234;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_hybrid_parallel_inference_helper
PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
py_test_modules(
test_fleet_rolemaker_new MODULES test_fleet_rolemaker_new ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_rolemaker_new PROPERTIES RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX OR WIN32))
py_test_modules(
test_dist_mnist_gradient_merge MODULES test_dist_mnist_gradient_merge ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_dist_mnist_gradient_merge PROPERTIES TIMEOUT "360"
RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
py_test_modules(
test_recv_save_op MODULES test_recv_save_op ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_recv_save_op PROPERTIES RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
py_test_modules(
test_communicator_sync
MODULES
test_communicator_sync
ENVS
"FLAGS_communicator_send_queue_size=1;FLAGS_communicator_max_merge_var_num=1;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_communicator_sync PROPERTIES RUN_SERIAL 1)
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND LOCAL_ALL_PLAT)
bash_test_modules(
test_fleet_pipeline_meta_optimizer
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21236;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_fleet_pipeline_meta_optimizer PROPERTIES RUN_SERIAL
1)
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND LOCAL_ALL_PLAT)
py_test_modules(
test_fleet_gradient_merge_meta_optimizer MODULES
test_fleet_gradient_merge_meta_optimizer ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_gradient_merge_meta_optimizer
PROPERTIES RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND (LINUX OR WIN32))
py_test_modules(
test_fleet_amp_init MODULES test_fleet_amp_init ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_amp_init PROPERTIES RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_dygraph_sharding_optimizer_stage2
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21238;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_dygraph_sharding_optimizer_stage2
PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND (LINUX OR WIN32))
py_test_modules(
test_fleet_meta_optimizer_base MODULES test_fleet_meta_optimizer_base ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_meta_optimizer_base PROPERTIES RUN_SERIAL 1)
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND LOCAL_ALL_PLAT)
bash_test_modules(
test_fleet_raw_program_meta_optimizer
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21240;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_fleet_raw_program_meta_optimizer
PROPERTIES RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_sharding_parallel
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21242;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_sharding_parallel
PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_tensor_parallel
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21244;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_tensor_parallel
PROPERTIES TIMEOUT "200" RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_dygraph_group_sharded_api_for_eager
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21246;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_dygraph_group_sharded_api_for_eager
PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND (LINUX OR WIN32))
py_test_modules(
test_fleet_distributed_strategy MODULES test_fleet_distributed_strategy
ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_distributed_strategy PROPERTIES RUN_SERIAL 1)
endif()
if(WITH_DGC)
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
py_test_modules(
test_fleet_dgc_meta_optimizer MODULES test_fleet_dgc_meta_optimizer ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_dgc_meta_optimizer PROPERTIES RUN_SERIAL 1)
endif()
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_unused_variables
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21248;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_unused_variables
PROPERTIES TIMEOUT "350" RUN_SERIAL 1)
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND (LINUX))
py_test_modules(
test_fleet_lamb_meta_optimizer MODULES test_fleet_lamb_meta_optimizer ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_lamb_meta_optimizer PROPERTIES RUN_SERIAL 1)
endif()
if(WITH_DGC)
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
py_test_modules(
test_dgc_momentum_op MODULES test_dgc_momentum_op ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_dgc_momentum_op PROPERTIES RUN_SERIAL 1)
endif()
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_no_sync_gradient_check
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21250;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_no_sync_gradient_check
PROPERTIES TIMEOUT "60" RUN_SERIAL 1)
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND LOCAL_ALL_PLAT)
bash_test_modules(
test_fleet_pipeline_meta_optimizer_with_recompute
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21252;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_fleet_pipeline_meta_optimizer_with_recompute
PROPERTIES RUN_SERIAL 1)
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND (WIN32 OR LINUX))
py_test_modules(
test_fleet_hybrid_meta_optimizer MODULES test_fleet_hybrid_meta_optimizer
ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_hybrid_meta_optimizer PROPERTIES RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_qat
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21254;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_qat PROPERTIES TIMEOUT "120"
RUN_SERIAL 1)
endif()
if(WITH_NCCL)
if(${NCCL_VERSION} VERSION_GREATER_EQUAL 2212)
if((WITH_GPU) AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_sparse_embedding
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21256;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_sparse_embedding
PROPERTIES TIMEOUT "200" RUN_SERIAL 1)
endif()
endif()
endif()
if((WITH_ROCM) AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_sparse_embedding
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21258;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_sparse_embedding
PROPERTIES TIMEOUT "200" RUN_SERIAL 1)
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND LOCAL_ALL_PLAT)
py_test_modules(
test_fleet_amp_meta_optimizer MODULES test_fleet_amp_meta_optimizer ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_amp_meta_optimizer PROPERTIES RUN_SERIAL 1)
endif()
if(WITH_NCCL)
if(${NCCL_VERSION} VERSION_GREATER_EQUAL 2212)
if((WITH_GPU) AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_sparse_embedding_over_height
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21260;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_sparse_embedding_over_height
PROPERTIES TIMEOUT "150" RUN_SERIAL 1)
endif()
endif()
endif()
if((WITH_ROCM) AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_sparse_embedding_over_height
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21262;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_sparse_embedding_over_height
PROPERTIES TIMEOUT "350" RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND (LINUX OR APPLE))
py_test_modules(
test_distributed_strategy MODULES test_distributed_strategy ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_distributed_strategy PROPERTIES RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_auto_parallel_parallelizer
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21264;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_auto_parallel_parallelizer PROPERTIES TIMEOUT "120"
RUN_SERIAL 1)
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND (LINUX OR WIN32))
py_test_modules(
test_fleet_recompute_meta_optimizer MODULES
test_fleet_recompute_meta_optimizer ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_recompute_meta_optimizer PROPERTIES RUN_SERIAL
1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_dygraph_group_sharded_api
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21266;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_dygraph_group_sharded_api PROPERTIES TIMEOUT "120"
RUN_SERIAL 1)
endif()
if(LOCAL_ALL_ARCH AND (LINUX OR WIN32))
py_test_modules(
test_fleet_private_function MODULES test_fleet_private_function ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_fleet_private_function PROPERTIES RUN_SERIAL 1)
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND LOCAL_ALL_PLAT)
bash_test_modules(
test_new_group
START_BASH
test_new_group.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21268;http_proxy=;https_proxy=")
set_tests_properties(test_new_group PROPERTIES RUN_SERIAL 1)
endif()
if((WITH_GPU
OR WITH_XPU
OR WITH_ASCEND
OR WITH_ASCEND_CL
)
AND (LINUX))
bash_test_modules(
test_c_comm_init_op
START_BASH
test_c_comm_init_op.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21270;http_proxy=;https_proxy=")
set_tests_properties(test_c_comm_init_op PROPERTIES TIMEOUT "120" RUN_SERIAL
1)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_ir_pass_pipeline
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21272;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_ir_pass_pipeline PROPERTIES TIMEOUT "120"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_mnist
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21274;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_mnist PROPERTIES TIMEOUT "200"
RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
bash_test_modules(
test_parallel_dygraph_se_resnext
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21276;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_se_resnext
PROPERTIES TIMEOUT "200" RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
py_test_modules(
test_parallel_dygraph_sync_batch_norm MODULES
test_parallel_dygraph_sync_batch_norm ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_parallel_dygraph_sync_batch_norm
PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
py_test_modules(
test_imperative_auto_mixed_precision MODULES
test_imperative_auto_mixed_precision ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_imperative_auto_mixed_precision
PROPERTIES TIMEOUT "300" RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
py_test_modules(
test_imperative_auto_mixed_precision_for_eager MODULES
test_imperative_auto_mixed_precision_for_eager ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_imperative_auto_mixed_precision_for_eager
PROPERTIES TIMEOUT "300" RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
py_test_modules(
test_mixed_precision MODULES test_mixed_precision ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_mixed_precision PROPERTIES RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
py_test_modules(
test_dygraph_recompute MODULES test_dygraph_recompute ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_dygraph_recompute PROPERTIES RUN_SERIAL 1)
endif()
if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
py_test_modules(
test_dygraph_recompute_for_eager MODULES test_dygraph_recompute_for_eager
ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_dygraph_recompute_for_eager PROPERTIES RUN_SERIAL 1)
endif()
if(WITH_NCCL OR WITH_RCCL)
if(WITH_DGC)
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_dist_mnist_dgc_nccl
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21278;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_dist_mnist_dgc_nccl PROPERTIES RUN_SERIAL 1)
endif()
endif()
endif()
if(WITH_NCCL OR WITH_RCCL)
if(WITH_DGC)
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
bash_test_modules(
test_dist_se_resnext_dgc
START_BASH
../../dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21280;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_dist_se_resnext_dgc PROPERTIES RUN_SERIAL 1)
endif()
endif()
endif()
...@@ -49,7 +49,7 @@ class TestDistMnistNCCL2DGC(TestDistBase): ...@@ -49,7 +49,7 @@ class TestDistMnistNCCL2DGC(TestDistBase):
def test_dist_train(self): def test_dist_train(self):
import paddle.fluid as fluid import paddle.fluid as fluid
if fluid.core.is_compiled_with_cuda(): if fluid.core.is_compiled_with_cuda():
self.check_with_place("dist_mnist.py", self.check_with_place(os.path.abspath("../../dist_mnist.py"),
delta=1e-5, delta=1e-5,
check_error_log=True, check_error_log=True,
log_name=flag_name) log_name=flag_name)
...@@ -80,10 +80,11 @@ class TestDistMnistNCCL2DGCMultiCards(TestDistBase): ...@@ -80,10 +80,11 @@ class TestDistMnistNCCL2DGCMultiCards(TestDistBase):
def test_dist_train(self): def test_dist_train(self):
import paddle.fluid as fluid import paddle.fluid as fluid
if fluid.core.is_compiled_with_cuda(): if fluid.core.is_compiled_with_cuda():
self.check_with_place_multi_cards("dist_mnist.py", self.check_with_place_multi_cards(
delta=1e-5, os.path.abspath("../../dist_mnist.py"),
check_error_log=True, delta=1e-5,
log_name=flag_name) check_error_log=True,
log_name=flag_name)
def tearDown(self): def tearDown(self):
import paddle.fluid as fluid import paddle.fluid as fluid
......
...@@ -17,8 +17,6 @@ import unittest ...@@ -17,8 +17,6 @@ import unittest
from test_dist_base import TestDistBase from test_dist_base import TestDistBase
import os import os
import os
flag_name = os.path.splitext(__file__)[0] flag_name = os.path.splitext(__file__)[0]
...@@ -35,7 +33,7 @@ class TestDistSeResnetNCCL2DGC(TestDistBase): ...@@ -35,7 +33,7 @@ class TestDistSeResnetNCCL2DGC(TestDistBase):
def test_dist_train(self): def test_dist_train(self):
import paddle.fluid as fluid import paddle.fluid as fluid
if fluid.core.is_compiled_with_cuda(): if fluid.core.is_compiled_with_cuda():
self.check_with_place("dist_se_resnext.py", self.check_with_place(os.path.abspath("../../dist_se_resnext.py"),
delta=30, delta=30,
check_error_log=True, check_error_log=True,
log_name=flag_name) log_name=flag_name)
......
...@@ -36,10 +36,11 @@ class TestParallelDygraphMnist(TestDistBase): ...@@ -36,10 +36,11 @@ class TestParallelDygraphMnist(TestDistBase):
def test_mnist(self): def test_mnist(self):
if fluid.core.is_compiled_with_cuda(): if fluid.core.is_compiled_with_cuda():
self.check_with_place("parallel_dygraph_mnist.py", self.check_with_place(
delta=1e-5, os.path.abspath("../../parallel_dygraph_mnist.py"),
check_error_log=True, delta=1e-5,
log_name=flag_name) check_error_log=True,
log_name=flag_name)
#TODO(liuyuhui): Multi-Card Baidu Kunlun XPU training exist accuracy problems #TODO(liuyuhui): Multi-Card Baidu Kunlun XPU training exist accuracy problems
...@@ -55,10 +56,11 @@ class TestParallelDygraphMnistXPU(TestDistBase): ...@@ -55,10 +56,11 @@ class TestParallelDygraphMnistXPU(TestDistBase):
def test_mnist_xpu(self): def test_mnist_xpu(self):
if fluid.core.is_compiled_with_xpu(): if fluid.core.is_compiled_with_xpu():
self.check_with_place("parallel_dygraph_mnist.py", self.check_with_place(
delta=1e-4, os.path.abspath("../../parallel_dygraph_mnist.py"),
check_error_log=True, delta=1e-4,
log_name=flag_name) check_error_log=True,
log_name=flag_name)
class TestParallelDygraphMnistSpawn(TestDistSpawnRunner): class TestParallelDygraphMnistSpawn(TestDistSpawnRunner):
...@@ -80,10 +82,11 @@ class TestParallelDygraphMnistAccGrad(TestDistBase): ...@@ -80,10 +82,11 @@ class TestParallelDygraphMnistAccGrad(TestDistBase):
def test_mnist(self): def test_mnist(self):
if fluid.core.is_compiled_with_cuda(): if fluid.core.is_compiled_with_cuda():
self.check_with_place("parallel_dygraph_mnist.py", self.check_with_place(
delta=1e-5, os.path.abspath("../../parallel_dygraph_mnist.py"),
check_error_log=True, delta=1e-5,
log_name=flag_name) check_error_log=True,
log_name=flag_name)
class TestFleetDygraphMnistXPU(TestDistBase): class TestFleetDygraphMnistXPU(TestDistBase):
...@@ -97,10 +100,11 @@ class TestFleetDygraphMnistXPU(TestDistBase): ...@@ -97,10 +100,11 @@ class TestFleetDygraphMnistXPU(TestDistBase):
def test_mnist(self): def test_mnist(self):
if fluid.core.is_compiled_with_xpu(): if fluid.core.is_compiled_with_xpu():
self.check_with_place("parallel_dygraph_mnist.py", self.check_with_place(
delta=1e-4, os.path.abspath("../../parallel_dygraph_mnist.py"),
check_error_log=True, delta=1e-4,
log_name=flag_name) check_error_log=True,
log_name=flag_name)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -24,8 +24,11 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus ...@@ -24,8 +24,11 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus
class TestHybridPipeParallel(TestMultipleGpus): class TestHybridPipeParallel(TestMultipleGpus):
def test_hybrid_parallel_pp_layer(self): def test_hybrid_parallel_pp_layer(self):
self.run_mnist_2gpu('hybrid_parallel_pp_layer.py') self.run_mnist_2gpu(
self.run_mnist_2gpu('hybrid_parallel_pp_layer.py', eager_mode=False) os.path.abspath('../../hybrid_parallel_pp_layer.py'))
self.run_mnist_2gpu(
os.path.abspath('../../hybrid_parallel_pp_layer.py'),
eager_mode=False)
def test_hybrid_parallel_pp_tuple_inputs(self): def test_hybrid_parallel_pp_tuple_inputs(self):
self.run_mnist_2gpu('hybrid_parallel_pp_embedding.py') self.run_mnist_2gpu('hybrid_parallel_pp_embedding.py')
......
...@@ -36,10 +36,11 @@ class TestParallelDygraphSparseEmdedding(TestDistBase): ...@@ -36,10 +36,11 @@ class TestParallelDygraphSparseEmdedding(TestDistBase):
def test_sparse_embedding(self): def test_sparse_embedding(self):
if fluid.core.is_compiled_with_cuda(): if fluid.core.is_compiled_with_cuda():
self.check_with_place("parallel_dygraph_sparse_embedding.py", self.check_with_place(
delta=1e-5, os.path.abspath("../../parallel_dygraph_sparse_embedding.py"),
check_error_log=True, delta=1e-5,
log_name=flag_name) check_error_log=True,
log_name=flag_name)
class TestParallelDygraphSparseEmdeddingFP64(TestDistBase): class TestParallelDygraphSparseEmdeddingFP64(TestDistBase):
...@@ -51,7 +52,8 @@ class TestParallelDygraphSparseEmdeddingFP64(TestDistBase): ...@@ -51,7 +52,8 @@ class TestParallelDygraphSparseEmdeddingFP64(TestDistBase):
def test_sparse_embedding_fp64(self): def test_sparse_embedding_fp64(self):
if fluid.core.is_compiled_with_cuda(): if fluid.core.is_compiled_with_cuda():
self.check_with_place("parallel_dygraph_sparse_embedding_fp64.py", self.check_with_place(os.path.abspath(
"../../parallel_dygraph_sparse_embedding_fp64.py"),
delta=1e-5, delta=1e-5,
check_error_log=True, check_error_log=True,
log_name=flag_name) log_name=flag_name)
......
...@@ -35,11 +35,11 @@ class TestParallelDygraphSparseEmdeddingOverHeight(TestDistBase): ...@@ -35,11 +35,11 @@ class TestParallelDygraphSparseEmdeddingOverHeight(TestDistBase):
def test_sparse_embedding(self): def test_sparse_embedding(self):
if fluid.core.is_compiled_with_cuda(): if fluid.core.is_compiled_with_cuda():
self.check_with_place( self.check_with_place(os.path.abspath(
"parallel_dygraph_sparse_embedding_over_height.py", "../../parallel_dygraph_sparse_embedding_over_height.py"),
delta=1e-5, delta=1e-5,
check_error_log=True, check_error_log=True,
log_name=flag_name) log_name=flag_name)
class TestParallelDygraphSparseEmdeddingOverHeightSpawn(TestDistSpawnRunner): class TestParallelDygraphSparseEmdeddingOverHeightSpawn(TestDistSpawnRunner):
......
...@@ -35,10 +35,11 @@ class TestParallelDygraphUnusedVar(TestDistBase): ...@@ -35,10 +35,11 @@ class TestParallelDygraphUnusedVar(TestDistBase):
def test_net(self): def test_net(self):
if fluid.core.is_compiled_with_cuda(): if fluid.core.is_compiled_with_cuda():
self.check_with_place("parallel_dygraph_unused_variables.py", self.check_with_place(
delta=1e-5, os.path.abspath("../../parallel_dygraph_unused_variables.py"),
check_error_log=True, delta=1e-5,
log_name=flag_name) check_error_log=True,
log_name=flag_name)
class TestFleetDygraphUnusedVar(TestParallelDygraphUnusedVar): class TestFleetDygraphUnusedVar(TestParallelDygraphUnusedVar):
...@@ -67,10 +68,11 @@ class TestParallelDygraphNoVar(TestDistBase): ...@@ -67,10 +68,11 @@ class TestParallelDygraphNoVar(TestDistBase):
def test_net(self): def test_net(self):
if fluid.core.is_compiled_with_cuda(): if fluid.core.is_compiled_with_cuda():
self.check_with_place("parallel_dygraph_none_var.py", self.check_with_place(
delta=1e-5, os.path.abspath("../../parallel_dygraph_none_var.py"),
check_error_log=True, delta=1e-5,
log_name=flag_name) check_error_log=True,
log_name=flag_name)
class TestParallelDygraphSharedUnusedVariables(TestDistBase): class TestParallelDygraphSharedUnusedVariables(TestDistBase):
...@@ -82,10 +84,11 @@ class TestParallelDygraphSharedUnusedVariables(TestDistBase): ...@@ -82,10 +84,11 @@ class TestParallelDygraphSharedUnusedVariables(TestDistBase):
def test_mnist(self): def test_mnist(self):
if fluid.core.is_compiled_with_cuda(): if fluid.core.is_compiled_with_cuda():
self.check_with_place("parallel_dygraph_shared_unused_var.py", self.check_with_place(
delta=1e-5, os.path.abspath("../../parallel_dygraph_shared_unused_var.py"),
check_error_log=True, delta=1e-5,
log_name=flag_name) check_error_log=True,
log_name=flag_name)
if __name__ == "__main__": if __name__ == "__main__":
......
name,os,arch,timeout,run_type,launcher,num_port,run_serial,envs,conditions
test_fleet_sharding_meta_optimizer,,GPU;XPU;ASCEND;ASCEND_CL,350,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_static_mp_layers,linux;win32,,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_dgc_op,,,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_DGC
test_dgc_optimizer,,,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_DGC
test_parallel_margin_cross_entropy,,GPU,120,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_NCCL
test_dygraph_sharding_stage3,,GPU,350,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_NCCL
test_parallel_dygraph_transformer,,GPU,,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_NCCL;${NCCL_VERSION} VERSION_GREATER_EQUAL 2212
test_parallel_dygraph_transformer,,ROCM,,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_fp16_allreduce_meta_optimizer,LINUX;WIN32,,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_rnn_dp,,GPU;XPU;ASCEND;ASCEND_CL,,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_parallel_dygraph_mp_layers,,GPU,120,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_NCCL
test_tcp_store,LINUX;APPLE,,,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_dygraph_sharding_stage3_for_eager,,,350,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_graph_execution_meta_optimizer,,GPU;XPU;ASCEND;ASCEND_CL,,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_communicator_half_async,,,120,DIST,test_runner.py,2,1,FLAGS_communicator_send_queue_size=1;FLAGS_communicator_max_merge_var_num=1;http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_NCCL
test_fleet_graph_executor,,GPU;XPU;ASCEND;ASCEND_CL,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_parallel_dygraph_pipeline_parallel,,GPU,500,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_localsgd_meta_optimizer,LINUX,GPU;XPU;ASCEND;ASCEND_CL,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_parallel_class_center_sample,,GPU,120,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_NCCL
test_pipeline,,,120,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_utils,LINUX;APPLE,,120,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_static_model_parallel,,,240,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_parallel_dygraph_no_sync,,GPU,300,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_NCCL
test_dygraph_sharding_stage2,,,200,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_parallel_dygraph_control_flow,,,350,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_lars_meta_optimizer,,GPU;XPU;ASCEND;ASCEND_CL,,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_hybrid_parallel_inference_helper,,,120,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_rolemaker_new,,,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_dist_mnist_gradient_merge,LINUX;WIN32,GPU;ROCM,360,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_recv_save_op,,,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_communicator_sync,,,,DIST,test_runner.py,2,1,FLAGS_communicator_send_queue_size=1;FLAGS_communicator_max_merge_var_num=1;http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_pipeline_meta_optimizer,,GPU;XPU;ASCEND;ASCEND_CL,,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_gradient_merge_meta_optimizer,,GPU;XPU;ASCEND;ASCEND_CL,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_amp_init,linux;win32,,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_dygraph_sharding_optimizer_stage2,,,120,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_meta_optimizer_base,linux;win32,,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_raw_program_meta_optimizer,,GPU;XPU;ASCEND;ASCEND_CL,,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_parallel_dygraph_sharding_parallel,,,120,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_parallel_dygraph_tensor_parallel,,,200,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_dygraph_group_sharded_api_for_eager,,,120,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_distributed_strategy,linux;win32,,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_dgc_meta_optimizer,,,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_DGC
test_parallel_dygraph_unused_variables,,,350,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_lamb_meta_optimizer,linux,GPU;XPU;ASCEND;ASCEND_CL,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_dgc_momentum_op,,,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_DGC
test_parallel_dygraph_no_sync_gradient_check,,,60,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_pipeline_meta_optimizer_with_recompute,,GPU;XPU;ASCEND;ASCEND_CL,,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_hybrid_meta_optimizer,WIN32;LINUX,GPU;XPU;ASCEND;ASCEND_CL,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_parallel_dygraph_qat,,,120,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_parallel_dygraph_sparse_embedding,,GPU,200,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_NCCL;${NCCL_VERSION} VERSION_GREATER_EQUAL 2212
test_parallel_dygraph_sparse_embedding,,ROCM,200,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_amp_meta_optimizer,,GPU;XPU;ASCEND;ASCEND_CL,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_parallel_dygraph_sparse_embedding_over_height,,GPU,150,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_NCCL;${NCCL_VERSION} VERSION_GREATER_EQUAL 2212
test_parallel_dygraph_sparse_embedding_over_height,,ROCM,350,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_distributed_strategy,LINUX;APPLE,,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_auto_parallel_parallelizer,,,120,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_recompute_meta_optimizer,linux;win32,GPU;XPU;ASCEND;ASCEND_CL,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_dygraph_group_sharded_api,,,120,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_fleet_private_function,linux;win32,,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_new_group,,GPU;XPU;ASCEND;ASCEND_CL,,DIST,test_new_group.sh,2,1,http_proxy=;https_proxy=,
test_c_comm_init_op,linux,GPU;XPU;ASCEND;ASCEND_CL,120,DIST,test_c_comm_init_op.sh,2,1,http_proxy=;https_proxy=,
test_ir_pass_pipeline,,,120,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_parallel_dygraph_mnist,,GPU;ROCM,200,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_parallel_dygraph_se_resnext,,GPU;ROCM,200,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_parallel_dygraph_sync_batch_norm,,GPU;ROCM,120,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_imperative_auto_mixed_precision,,GPU;ROCM,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_imperative_auto_mixed_precision_for_eager,,GPU;ROCM,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_mixed_precision,,GPU;ROCM,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_dygraph_recompute,,GPU;ROCM,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_dygraph_recompute_for_eager,,GPU;ROCM,,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,
test_dist_mnist_dgc_nccl,,,,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_NCCL OR WITH_RCCL;WITH_DGC
test_dist_se_resnext_dgc,,,,DIST,../../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=../..,WITH_NCCL OR WITH_RCCL;WITH_DGC
# This file is generated by ${PADDLE_ROOT}/tools/gen_ut_cmakelists.py.
# Please don't modify this file manually.
# If you need to change unittests in this file, please modify testslist.csv in the current directory
# and then run the command `python3 ${PADDLE_ROOT}/tools/gen_ut_cmakelists.py -f ${CURRENT_DIRECTORY}/testslist.csv`
set(LOCAL_ALL_ARCH ON)
set(LOCAL_ALL_PLAT ON)
if(WITH_MULTINODE_TESTING)
if((WITH_GPU) AND (LINUX))
bash_test_modules(
test_multinode_dygraph_hybrid_dpppmp
START_BASH
multinode_dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21282;http_proxy=;https_proxy=")
set_tests_properties(test_multinode_dygraph_hybrid_dpppmp
PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
endif()
if(WITH_MULTINODE_TESTING)
if((WITH_GPU) AND (LINUX))
bash_test_modules(
test_multinode_dygraph_hybrid_dp
START_BASH
multinode_dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21284;http_proxy=;https_proxy=")
set_tests_properties(test_multinode_dygraph_hybrid_dp
PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
endif()
if(WITH_MULTINODE_TESTING)
if((WITH_GPU) AND (LINUX))
bash_test_modules(
test_multinode_dygraph_sharding
START_BASH
multinode_dist_test.sh
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21286;http_proxy=;https_proxy=")
set_tests_properties(test_multinode_dygraph_sharding
PROPERTIES TIMEOUT "120" RUN_SERIAL 1)
endif()
endif()
name,os,arch,timeout,run_type,launcher,num_port,run_serial,ENVS,conditions
test_multinode_dygraph_hybrid_dpppmp,linux,gpu,120,DIST,multinode_dist_test.sh,8,1,http_proxy=;https_proxy=,WITH_MULTINODE_TESTING
test_multinode_dygraph_hybrid_dp,linux,gpu,120,DIST,multinode_dist_test.sh,8,1,http_proxy=;https_proxy=,WITH_MULTINODE_TESTING
test_multinode_dygraph_sharding,linux,gpu,120,DIST,multinode_dist_test.sh,8,1,http_proxy=;https_proxy=,WITH_MULTINODE_TESTING
name,os,arch,timeout,run_type,launcher,dist_ut_port,run_serial,ENVS,conditions name,os,arch,timeout,run_type,launcher,num_port,run_serial,ENVS,conditions
test_allreduce,linux,gpu;rocm,120,DIST,test_runner.py,20071,1,PYTHONPATH=..;http_proxy=;https_proxy=, test_allreduce,linux,gpu;rocm,120,DIST,test_runner.py,2,1,PYTHONPATH=..;http_proxy=;https_proxy=,
test_broadcast,linux,gpu;rocm,120,DIST,test_runner.py,20073,1,PYTHONPATH=..;http_proxy=;https_proxy=, test_broadcast,linux,gpu;rocm,120,DIST,test_runner.py,2,1,PYTHONPATH=..;http_proxy=;https_proxy=,
test_c_concat,linux,gpu;rocm,120,DIST,test_runner.py,20075,1,PYTHONPATH=..;http_proxy=;https_proxy=, test_c_concat,linux,gpu;rocm,120,DIST,test_runner.py,2,1,PYTHONPATH=..;http_proxy=;https_proxy=,
test_c_identity,linux,gpu;rocm,120,DIST,test_runner.py,20077,1,PYTHONPATH=..;http_proxy=;https_proxy=, test_c_identity,linux,gpu;rocm,120,DIST,test_runner.py,2,1,PYTHONPATH=..;http_proxy=;https_proxy=,
test_c_split,linux,gpu;rocm,120,DIST,test_runner.py,20079,1,PYTHONPATH=..;http_proxy=;https_proxy=, test_c_split,linux,gpu;rocm,120,DIST,test_runner.py,2,1,PYTHONPATH=..;http_proxy=;https_proxy=,
test_collective_split_embedding,linux,rocm;gpu,300,DIST,../dist_test.sh,20081,1,PYTHONPATH=..;http_proxy=;https_proxy=, test_collective_split_embedding,linux,rocm;gpu,300,DIST,../dist_test.sh,2,1,PYTHONPATH=..;http_proxy=;https_proxy=,
test_collective_allgather_api,linux,gpu;rocm,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_allgather_object_api,linux,gpu;rocm,120,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_allreduce_api,linux,gpu;rocm,120,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_alltoall_api,linux,gpu;rocm,120,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_alltoall_single,linux,gpu;rocm,350,DIST,../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_barrier_api,linux,gpu;rocm,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_batch_isend_irecv,linux,gpu;rocm,350,DIST,../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_broadcast_api,linux,gpu;rocm,120,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_cpu_barrier_with_gloo,linux,gpu;rocm,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_global_gather,linux,gpu;rocm,200,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_global_scatter,linux,gpu;rocm,200,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_optimizer,linux,gpu;rocm,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_process_group,linux,gpu;rocm,350,DIST,../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_reduce,linux,gpu;rocm,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_reduce_api,linux,gpu;rocm,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_reduce_scatter,linux,gpu;rocm,350,DIST,../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_scatter,linux,gpu;rocm,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_scatter_api,linux,gpu;rocm,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_sendrecv,linux,gpu;rocm,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_sendrecv_api,linux,gpu;rocm,120,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_split_col_linear,linux,gpu;rocm,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_split_embedding_none_divisible,linux,gpu;rocm,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_split_row_linear,linux,gpu;rocm,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_collective_wait,linux,gpu;rocm,300,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_eager_dist_api,linux,gpu;rocm,120,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_new_group_api,linux,gpu;rocm,120,DIST,test_runner.py,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
test_gen_nccl_id_op,,gpu;rocm;ASCEND;ASCEND_CL,,DIST,../dist_test.sh,2,1,http_proxy=;https_proxy=;PYTHONPATH=..,
...@@ -13,6 +13,10 @@ ...@@ -13,6 +13,10 @@
# limitations under the License. # limitations under the License.
import re import re
import os
import argparse
# port range (21200, 23000) is reserved for dist-ops
# function to process pythonpath env # function to process pythonpath env
...@@ -67,10 +71,10 @@ def process_conditions(conditions): ...@@ -67,10 +71,10 @@ def process_conditions(conditions):
Output: "" Output: ""
""" """
if len(conditions.strip()) == 0: if len(conditions.strip()) == 0:
conditions = "" conditions = []
else: else:
conditions = f" AND ({conditions})" conditions = conditions.strip().split(";")
return conditions return [c.strip() for c in conditions]
def proccess_archs(arch): def proccess_archs(arch):
...@@ -90,8 +94,8 @@ def proccess_archs(arch): ...@@ -90,8 +94,8 @@ def proccess_archs(arch):
arch = arch.upper().strip() arch = arch.upper().strip()
if len(arch) > 0: if len(arch) > 0:
for a in arch.split(";"): for a in arch.split(";"):
assert a in ["GPU", "ROCM", "ASCEND", "ASCEND_CL"], \ assert a in ["GPU", "ROCM", "ASCEND", "ASCEND_CL", "XPU"], \
f"""Supported arhc options are "GPU", "ROCM", "ASCEND" and "ASCEND_CL", but the options is {a}""" f"""Supported arhc options are "GPU", "ROCM", "ASCEND" and "ASCEND_CL", "XPU", but the options is {a}"""
archs += "WITH_" + a.upper() + " OR " archs += "WITH_" + a.upper() + " OR "
arch = "(" + archs[:-4] + ")" arch = "(" + archs[:-4] + ")"
else: else:
...@@ -135,6 +139,34 @@ def process_run_serial(run_serial): ...@@ -135,6 +139,34 @@ def process_run_serial(run_serial):
return rs return rs
def file_with_extension(prefix, suffixes):
"""
Desc:
check whether test file exists.
"""
for ext in suffixes:
if os.path.isfile(prefix + ext):
return True
return False
def process_name(name, curdir):
"""
Desc:
check whether name is with a legal format and check whther the test file exists.
"""
name = name.strip()
assert re.compile("^test_[0-9a-zA-Z_]+").search(name), \
f"""If line is not the header of table, the test name must begin with "test_" """ \
f"""and the following substring must include at least one char of "0-9", "a-z", "A-Z" or "_"."""
filepath_prefix = os.path.join(curdir, name)
suffix = [".py", ".sh"]
assert file_with_extension(filepath_prefix, suffix), \
f""" Please ensure the test file with the prefix '{filepath_prefix}' and one of the suffix {suffix} exists, because you specified a unittest named '{name}'"""
return name
def process_run_type(run_type): def process_run_type(run_type):
rt = run_type.strip() rt = run_type.strip()
assert re.compile("^(NIGHTLY|EXCLUSIVE|CINN|DIST|GPUPS|INFER|EXCLUSIVE:NIGHTLY|DIST:NIGHTLY)$").search(rt), \ assert re.compile("^(NIGHTLY|EXCLUSIVE|CINN|DIST|GPUPS|INFER|EXCLUSIVE:NIGHTLY|DIST:NIGHTLY)$").search(rt), \
...@@ -143,7 +175,18 @@ def process_run_type(run_type): ...@@ -143,7 +175,18 @@ def process_run_type(run_type):
return rt return rt
def parse_line(line): DIST_UT_PORT = 21200
def process_dist_ut_port(port_num):
global DIST_UT_PORT
port = DIST_UT_PORT
assert port < 23000, "dist port is exahausted"
DIST_UT_PORT += int(port_num)
return port
def parse_line(line, curdir):
""" """
Desc: Desc:
Input a line in csv file and output a string in cmake grammer, adding the specified test and setting its properties. Input a line in csv file and output a string in cmake grammer, adding the specified test and setting its properties.
...@@ -161,24 +204,14 @@ def parse_line(line): ...@@ -161,24 +204,14 @@ def parse_line(line):
endif()" endif()"
""" """
# A line contains name, os_, archs, timeout, run_type, launcher, dist_ut_port, run_serial, envs, conditions, etc.
# Following are descriptions of each variable:
#
# * `name`: the test's name
# * `os`: The supported operator system, ignoring case. If the test run in multiple operator systems, use ";" to split systems, forexample, `apple;linux` means the test runs on both Apple and Linux. The supported values are `linux`,`win32` and `apple`. If the value is empty, this means the test runs on all opertaor systems.
# * `arch`: the device's architecture. similar to `os`, multiple valuse ars splited by ";" and ignoring case. The supported arhchetectures are `gpu`, `xpu`, `npu` and `rocm`.
# * `timeout`: timeout of a unittest, whose unit is second.
# * `run_type`: run_type of a unittest. Supported values are `NIGHTLY`, `EXCLUSIVE`, `CINN`, `DIST`, `GPUPS`, `INFER`, `EXCLUSIVE:NIGHTLY`, `DIST:NIGHTLY`,which are case-insensitive.
# * `launcher`: the test launcher.Supported values are test_runner.py, dist_test.sh and custom scripts' name.
# * `dist_ut_port`: the starting port used in a distributed unit test
# * `run_serial`: whether in serial mode. the value can be 1 or 0. Default(empty) is 0
# * `ENVS`: required environments. multiple envirenmonts are splited by ";".
# * `conditions`: extra required conditions for some tests. the value is a boolean expression in cmake programmer.
name, os_, archs, timeout, run_type, launcher, dist_ut_port, run_serial, envs, conditions = line.strip( name, os_, archs, timeout, run_type, launcher, dist_ut_port, run_serial, envs, conditions = line.strip(
).split(",") ).split(",")
# name == "name" means the line being parsed is the header of the table
# we should skip this line and return empty here.
if name == "name": if name == "name":
return "" return ""
name = process_name(name, curdir)
envs = process_envs(envs) envs = process_envs(envs)
conditions = process_conditions(conditions) conditions = process_conditions(conditions)
...@@ -189,8 +222,13 @@ def parse_line(line): ...@@ -189,8 +222,13 @@ def parse_line(line):
cmd = "" cmd = ""
for c in conditions:
cmd += f"if ({c})\n"
time_out_str = f'TIMEOUT "{timeout}"' if len(timeout.strip()) > 0 else ''
if launcher[-3:] == ".sh": if launcher[-3:] == ".sh":
cmd += f'''if({archs} AND {os_} {conditions}) dist_ut_port = process_dist_ut_port(2)
cmd += f'''if({archs} AND {os_})
bash_test_modules( bash_test_modules(
{name} {name}
START_BASH START_BASH
...@@ -199,27 +237,48 @@ def parse_line(line): ...@@ -199,27 +237,48 @@ def parse_line(line):
"RUN_TYPE={run_type}" "RUN_TYPE={run_type}"
ENVS ENVS
"PADDLE_DIST_UT_PORT={dist_ut_port};{envs}") "PADDLE_DIST_UT_PORT={dist_ut_port};{envs}")
set_tests_properties({name} PROPERTIES TIMEOUT "{timeout}" RUN_SERIAL {run_serial}) set_tests_properties({name} PROPERTIES {time_out_str} RUN_SERIAL {run_serial})
endif() endif()
''' '''
else: else:
cmd += f'''if({archs} AND {os_} {conditions}) cmd += f'''if({archs} AND {os_})
py_test_modules( py_test_modules(
{name} {name}
MODULES MODULES
{name} {name}
ENVS ENVS
"PADDLE_DIST_UT_PORT={dist_ut_port};{envs}") "{envs}")
set_tests_properties({name} PROPERTIES TIMEOUT "{timeout}" RUN_SERIAL {run_serial}) set_tests_properties({name} PROPERTIES {time_out_str} RUN_SERIAL {run_serial})
endif() endif()
''' '''
for _ in conditions:
cmd += f"endif()\n"
return cmd return cmd
PROCESSED_DIR = set()
def gen_cmakelists(current_work_dir): def gen_cmakelists(current_work_dir):
print("procfessing dir:", current_work_dir) print("procfessing dir:", current_work_dir)
if current_work_dir == "": if current_work_dir == "":
current_work_dir = "." current_work_dir = "."
contents = os.listdir(current_work_dir)
sub_dirs = []
for c in contents:
c_path = os.path.join(current_work_dir, c)
if c_path in PROCESSED_DIR:
return
if os.path.isdir(c_path):
PROCESSED_DIR.add(c_path)
if os.path.isfile(os.path.join(current_work_dir, c, "testslist.csv")) \
or os.path.isfile(os.path.join(current_work_dir, c, "CMakeLists.txt")):
gen_cmakelists(os.path.join(current_work_dir, c))
sub_dirs.append(c)
if not os.path.isfile(os.path.join(current_work_dir, "testslist.csv")):
return
cmds = """# This file is generated by ${PADDLE_ROOT}/tools/gen_ut_cmakelists.py. cmds = """# This file is generated by ${PADDLE_ROOT}/tools/gen_ut_cmakelists.py.
# Please don't modify this file manually. # Please don't modify this file manually.
# If you need to change unittests in this file, please modify testslist.csv in the current directory # If you need to change unittests in this file, please modify testslist.csv in the current directory
...@@ -229,7 +288,7 @@ set(LOCAL_ALL_PLAT ON)\n""" ...@@ -229,7 +288,7 @@ set(LOCAL_ALL_PLAT ON)\n"""
with open(f"{current_work_dir}/testslist.csv") as csv_file: with open(f"{current_work_dir}/testslist.csv") as csv_file:
for i, line in enumerate(csv_file.readlines()): for i, line in enumerate(csv_file.readlines()):
try: try:
cmds += parse_line(line) cmds += parse_line(line, current_work_dir)
except Exception as e: except Exception as e:
print("===============PARSE LINE ERRORS OCCUR==========") print("===============PARSE LINE ERRORS OCCUR==========")
print(e) print(e)
...@@ -237,14 +296,14 @@ set(LOCAL_ALL_PLAT ON)\n""" ...@@ -237,14 +296,14 @@ set(LOCAL_ALL_PLAT ON)\n"""
print(f"[ERROR LINE {i+1}]: {line.strip()}") print(f"[ERROR LINE {i+1}]: {line.strip()}")
exit(1) exit(1)
for sub in sub_dirs:
cmds += f"add_subdirectory({sub})\n"
print(cmds, end="") print(cmds, end="")
with open(f"{current_work_dir}/CMakeLists.txt", "w") as cmake_file: with open(f"{current_work_dir}/CMakeLists.txt", "w") as cmake_file:
print(cmds, end="", file=cmake_file) print(cmds, end="", file=cmake_file)
if __name__ == "__main__": if __name__ == "__main__":
import os
import argparse
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
"--files", "--files",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册