From c1710e91b28c77f6f1895dc1f0d9c3057ed97080 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Fri, 18 Oct 2019 10:16:36 +0800 Subject: [PATCH] Disable GRPC_ARG_ALLOW_REUSEPORT to avoid potencial problem. (#20690) --- paddle/fluid/operators/distributed/grpc/grpc_server.cc | 1 + python/paddle/fluid/tests/unittests/dist_test.sh | 6 +++++- python/paddle/fluid/tests/unittests/test_dist_base.py | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/distributed/grpc/grpc_server.cc b/paddle/fluid/operators/distributed/grpc/grpc_server.cc index adaa5dfd76b..cf0e6448352 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_server.cc +++ b/paddle/fluid/operators/distributed/grpc/grpc_server.cc @@ -466,6 +466,7 @@ void AsyncGRPCServer::StartServer() { if (FLAGS_rpc_disable_reuse_port) { builder.SetOption( std::unique_ptr<::grpc::ServerBuilderOption>(new NoReusePortOption)); + LOG(INFO) << "set FLAGS_rpc_disable_reuse_port"; } builder.RegisterService(service.get()); diff --git a/python/paddle/fluid/tests/unittests/dist_test.sh b/python/paddle/fluid/tests/unittests/dist_test.sh index f8d464598ce..79eb8b4f26c 100644 --- a/python/paddle/fluid/tests/unittests/dist_test.sh +++ b/python/paddle/fluid/tests/unittests/dist_test.sh @@ -1,5 +1,6 @@ #!/bin/bash unset https_proxy http_proxy +export FLAGS_rpc_disable_reuse_port=1 name=${TEST_TARGET_NAME} TEST_TIMEOUT=${TEST_TIMEOUT} @@ -14,6 +15,7 @@ if [[ ${TEST_TIMEOUT}"x" == "x" ]]; then exit 1 fi + # rm flag file rm -f ${name}_*.log @@ -28,7 +30,9 @@ fi echo "${name} faild with ${exit_code}" -netstat -an +echo "after run ${name}" +ps -aux +netstat -anlp # paddle log echo "${name} log" diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index c0febf88a4d..27cef9d7a56 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -848,6 +848,7 @@ class TestDistBase(unittest.TestCase): "FLAGS_rpc_deadline": "30000", # 5sec to fail fast "FLAGS_rpc_retry_bind_port": "50", "FLAGS_cudnn_deterministic": "1", + "FLAGS_rpc_disable_reuse_port": "1", "http_proxy": "", "NCCL_P2P_DISABLE": "1", "NCCL_SHM_DISABLE": "1" @@ -859,7 +860,7 @@ class TestDistBase(unittest.TestCase): required_envs["GLOG_vmodule"] = \ "fused_all_reduce_op_handle=10,all_reduce_op_handle=10,alloc_continuous_space_op=10,fuse_all_reduce_op_pass=10," \ "alloc_continuous_space_for_grad_pass=10,fast_threaded_ssa_graph_executor=10,executor=10,operator=10," \ - "sparse_all_reduce_op_handle=10" + "sparse_all_reduce_op_handle=10,gen_nccl_id_op=10" required_envs["GLOG_logtostderr"] = "1" local_losses \ -- GitLab