From ab0c2e1dabac18270d6049cd7c74bde530e50802 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 26 Jun 2018 00:16:05 -0500 Subject: [PATCH] Fix rpc_deadline (#11709) --- paddle/fluid/operators/distributed/brpc_client.h | 16 +++++++--------- paddle/fluid/operators/distributed/grpc_client.h | 12 ++++++------ paddle/fluid/operators/distributed/rpc_client.cc | 2 +- paddle/fluid/operators/distributed/rpc_client.h | 16 ++++++++-------- 4 files changed, 22 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/operators/distributed/brpc_client.h b/paddle/fluid/operators/distributed/brpc_client.h index 34f140687f9..8ff1f0a6076 100644 --- a/paddle/fluid/operators/distributed/brpc_client.h +++ b/paddle/fluid/operators/distributed/brpc_client.h @@ -55,26 +55,24 @@ class BRPCClient : public RPCClient { bool AsyncSendVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& var_name, - int64_t time_out = RPCClient::rpc_time_out) override; + int64_t time_out = FLAGS_rpc_deadline) override; bool AsyncGetVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& var_name, - int64_t time_out = RPCClient::rpc_time_out) override; + int64_t time_out = FLAGS_rpc_deadline) override; bool AsyncPrefetchVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& in_var_name, const std::string& out_var_name, - int64_t time_out = RPCClient::rpc_time_out) override; + int64_t time_out = FLAGS_rpc_deadline) override; - void AsyncSendBatchBarrier( - const std::string& ep, - int64_t time_out = RPCClient::rpc_time_out) override; + void AsyncSendBatchBarrier(const std::string& ep, + int64_t time_out = FLAGS_rpc_deadline) override; - void AsyncSendFetchBarrier( - const std::string& ep, - int64_t time_out = RPCClient::rpc_time_out) override; + void AsyncSendFetchBarrier(const std::string& ep, + int64_t time_out = FLAGS_rpc_deadline) override; void Wait() override; diff --git a/paddle/fluid/operators/distributed/grpc_client.h b/paddle/fluid/operators/distributed/grpc_client.h index 5b1531d7ad1..940e9d879b3 100644 --- a/paddle/fluid/operators/distributed/grpc_client.h +++ b/paddle/fluid/operators/distributed/grpc_client.h @@ -178,24 +178,24 @@ class GRPCClient : public RPCClient { bool AsyncSendVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& var_name, - int64_t time_out = FLAGS_grpc_deadline) override; + int64_t time_out = FLAGS_rpc_deadline) override; bool AsyncGetVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& var_name, - int64_t time_out = FLAGS_grpc_deadline) override; + int64_t time_out = FLAGS_rpc_deadline) override; bool AsyncPrefetchVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& in_var_name, const std::string& out_var_name, - int64_t time_out = FLAGS_grpc_deadline) override; + int64_t time_out = FLAGS_rpc_deadline) override; void AsyncSendBatchBarrier(const std::string& ep, - int64_t time_out = FLAGS_grpc_deadline) override; + int64_t time_out = FLAGS_rpc_deadline) override; void AsyncSendFetchBarrier(const std::string& ep, - int64_t time_out = FLAGS_grpc_deadline) override; + int64_t time_out = FLAGS_rpc_deadline) override; void Wait() override; @@ -211,7 +211,7 @@ class GRPCClient : public RPCClient { void Proceed(); void AsyncSendComplete(const std::string& ep, - int64_t time_out = FLAGS_grpc_deadline); + int64_t time_out = FLAGS_rpc_deadline); std::shared_ptr GetChannel(const std::string& ep); diff --git a/paddle/fluid/operators/distributed/rpc_client.cc b/paddle/fluid/operators/distributed/rpc_client.cc index 2cf87faaab3..b5ec9fe5367 100644 --- a/paddle/fluid/operators/distributed/rpc_client.cc +++ b/paddle/fluid/operators/distributed/rpc_client.cc @@ -16,7 +16,7 @@ #include "gflags/gflags.h" // default to 3min to avoid temprary network failures. -DEFINE_int32(grpc_deadline, 180000, "deadline timeouts for grpc"); +DEFINE_int32(rpc_deadline, 180000, "deadline timeouts for rpc"); namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/distributed/rpc_client.h b/paddle/fluid/operators/distributed/rpc_client.h index db437a7f1ec..151b60d6f03 100644 --- a/paddle/fluid/operators/distributed/rpc_client.h +++ b/paddle/fluid/operators/distributed/rpc_client.h @@ -21,7 +21,7 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" -DECLARE_int32(grpc_deadline); +DECLARE_int32(rpc_deadline); namespace paddle { namespace operators { @@ -35,26 +35,26 @@ class RPCClient { const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& var_name, - int64_t time_out = FLAGS_grpc_deadline) = 0; + int64_t time_out = FLAGS_rpc_deadline) = 0; virtual bool AsyncGetVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& var_name, - int64_t time_out = FLAGS_grpc_deadline) = 0; + int64_t time_out = FLAGS_rpc_deadline) = 0; virtual bool AsyncPrefetchVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& in_var_name, const std::string& out_var_name, - int64_t time_out = FLAGS_grpc_deadline) = 0; + int64_t time_out = FLAGS_rpc_deadline) = 0; - virtual void AsyncSendBatchBarrier( - const std::string& ep, int64_t time_out = FLAGS_grpc_deadline) = 0; + virtual void AsyncSendBatchBarrier(const std::string& ep, + int64_t time_out = FLAGS_rpc_deadline) = 0; - virtual void AsyncSendFetchBarrier( - const std::string& ep, int64_t time_out = FLAGS_grpc_deadline) = 0; + virtual void AsyncSendFetchBarrier(const std::string& ep, + int64_t time_out = FLAGS_rpc_deadline) = 0; // SendComplete tells all the server that current trainer have no more data // to train, so that the pserver can reduce it's barrier count, and continue -- GitLab