diff --git a/paddle/fluid/operators/distributed/grpc/grpc_server.cc b/paddle/fluid/operators/distributed/grpc/grpc_server.cc index adaa5dfd76b341fc677f0611b4d11924f54a266c..cf0e6448352a2c926e9dda0c2ea90e03a2368430 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_server.cc +++ b/paddle/fluid/operators/distributed/grpc/grpc_server.cc @@ -466,6 +466,7 @@ void AsyncGRPCServer::StartServer() { if (FLAGS_rpc_disable_reuse_port) { builder.SetOption( std::unique_ptr<::grpc::ServerBuilderOption>(new NoReusePortOption)); + LOG(INFO) << "set FLAGS_rpc_disable_reuse_port"; } builder.RegisterService(service.get()); diff --git a/python/paddle/fluid/tests/unittests/dist_test.sh b/python/paddle/fluid/tests/unittests/dist_test.sh index f8d464598ce3d6e0e091a2428fedb41adfbc19d6..79eb8b4f26c8c606197aae6b583b1f26e53c36ea 100644 --- a/python/paddle/fluid/tests/unittests/dist_test.sh +++ b/python/paddle/fluid/tests/unittests/dist_test.sh @@ -1,5 +1,6 @@ #!/bin/bash unset https_proxy http_proxy +export FLAGS_rpc_disable_reuse_port=1 name=${TEST_TARGET_NAME} TEST_TIMEOUT=${TEST_TIMEOUT} @@ -14,6 +15,7 @@ if [[ ${TEST_TIMEOUT}"x" == "x" ]]; then exit 1 fi + # rm flag file rm -f ${name}_*.log @@ -28,7 +30,9 @@ fi echo "${name} faild with ${exit_code}" -netstat -an +echo "after run ${name}" +ps -aux +netstat -anlp # paddle log echo "${name} log" diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index c0febf88a4dcc61730556fdbe9f466370efb7562..27cef9d7a561e4b1f0b84eb09b3f07dfd710e842 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -848,6 +848,7 @@ class TestDistBase(unittest.TestCase): "FLAGS_rpc_deadline": "30000", # 5sec to fail fast "FLAGS_rpc_retry_bind_port": "50", "FLAGS_cudnn_deterministic": "1", + "FLAGS_rpc_disable_reuse_port": "1", "http_proxy": "", "NCCL_P2P_DISABLE": "1", "NCCL_SHM_DISABLE": "1" @@ -859,7 +860,7 @@ class TestDistBase(unittest.TestCase): required_envs["GLOG_vmodule"] = \ "fused_all_reduce_op_handle=10,all_reduce_op_handle=10,alloc_continuous_space_op=10,fuse_all_reduce_op_pass=10," \ "alloc_continuous_space_for_grad_pass=10,fast_threaded_ssa_graph_executor=10,executor=10,operator=10," \ - "sparse_all_reduce_op_handle=10" + "sparse_all_reduce_op_handle=10,gen_nccl_id_op=10" required_envs["GLOG_logtostderr"] = "1" local_losses \