From 9e4e9e9b6e21bbfdfa9b441badde28908ed36a0d Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Tue, 6 Nov 2018 10:17:08 +0800 Subject: [PATCH] clean rpc server profiler --- .../distributed/grpc_variable_response.cc | 6 +++- .../distributed/request_handler_impl.cc | 1 - .../fluid/operators/distributed/rpc_server.cc | 32 ------------------- .../fluid/operators/distributed/rpc_server.h | 16 ---------- paddle/fluid/operators/listen_and_serv_op.cc | 1 - paddle/fluid/platform/profiler.cc | 2 +- python/paddle/fluid/__init__.py | 1 - 7 files changed, 6 insertions(+), 53 deletions(-) diff --git a/paddle/fluid/operators/distributed/grpc_variable_response.cc b/paddle/fluid/operators/distributed/grpc_variable_response.cc index 34d47f3ec0f..eda4c45d3b9 100644 --- a/paddle/fluid/operators/distributed/grpc_variable_response.cc +++ b/paddle/fluid/operators/distributed/grpc_variable_response.cc @@ -22,6 +22,9 @@ #include "paddle/fluid/operators/distributed/grpc_variable_response.h" #include "paddle/fluid/platform/profiler.h" +DEFINE_string(rpc_server_profile_path, "/tmp/profile_ps", + "the profile log file path"); + namespace paddle { namespace operators { namespace distributed { @@ -289,7 +292,8 @@ int GRPCVariableResponse::Parse(Source* source) { // TODO(panyx0718): Should we allow to customize file dir. platform::DisableProfiler( platform::EventSortingKey::kDefault, - string::Sprintf("/tmp/profile_ps_%lld", listener_id)); + string::Sprintf("%s_%lld", FLAGS_rpc_server_profile_path, + listener_id)); } break; } diff --git a/paddle/fluid/operators/distributed/request_handler_impl.cc b/paddle/fluid/operators/distributed/request_handler_impl.cc index 849e412504e..a89ae59666d 100644 --- a/paddle/fluid/operators/distributed/request_handler_impl.cc +++ b/paddle/fluid/operators/distributed/request_handler_impl.cc @@ -50,7 +50,6 @@ bool RequestSendHandler::Handle(const std::string& varname, // Async if (!sync_mode_) { VLOG(3) << "async process var: " << varname; - rpc_server_->Profiler().OneStep(); try { executor_->RunPreparedContext((*grad_to_prepared_ctx_)[varname].get(), scope); diff --git a/paddle/fluid/operators/distributed/rpc_server.cc b/paddle/fluid/operators/distributed/rpc_server.cc index 084480ae48b..3e30ed4ac86 100644 --- a/paddle/fluid/operators/distributed/rpc_server.cc +++ b/paddle/fluid/operators/distributed/rpc_server.cc @@ -20,42 +20,10 @@ #include "paddle/fluid/operators/distributed/rpc_server.h" #include "paddle/fluid/platform/profiler.h" -DEFINE_int32(rpc_server_profile_period, 0, - "the period of listen_and_serv to do profile"); -DEFINE_string(rpc_server_profile_path, "/dev/null", - "the profile log file path"); - namespace paddle { namespace operators { namespace distributed { -RPCServerProfiler::RPCServerProfiler(int profile_period, - const std::string& profile_log_path) - : profile_period_(profile_period), profile_log_path_(profile_log_path) { - step_ = 0; -} - -void RPCServerProfiler::OneStep() { - PADDLE_ENFORCE_LE(step_, profile_period_, - "step_ should not be larger then " - "profile_period_"); - if (profile_period_ <= 0) { - return; - } - - if (step_ == 0) { - auto pf_state = paddle::platform::ProfilerState::kCPU; - paddle::platform::EnableProfiler(pf_state); - } - if (step_ == profile_period_) { - paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kTotal, - profile_log_path_); - step_ = 0; - } else { - step_++; - } -} - void RPCServer::ShutDown() { LOG(INFO) << "RPCServer ShutDown "; ShutDownImpl(); diff --git a/paddle/fluid/operators/distributed/rpc_server.h b/paddle/fluid/operators/distributed/rpc_server.h index f3e61e1575c..c6934f8ace5 100644 --- a/paddle/fluid/operators/distributed/rpc_server.h +++ b/paddle/fluid/operators/distributed/rpc_server.h @@ -23,30 +23,16 @@ #include "paddle/fluid/operators/distributed/request_handler.h" -DECLARE_int32(rpc_server_profile_period); DECLARE_string(rpc_server_profile_path); namespace paddle { namespace operators { namespace distributed { -class RPCServerProfiler { - public: - RPCServerProfiler(int profile_period, const std::string& profile_log_path); - void OneStep(); - - private: - const int profile_period_; - std::string profile_log_path_; - int step_; -}; - class RPCServer { public: explicit RPCServer(const std::string& address, int client_num) : cur_cond_(0), - profiler_(FLAGS_rpc_server_profile_period, - FLAGS_rpc_server_profile_path), bind_address_(address), exit_flag_(false), selected_port_(0), @@ -86,7 +72,6 @@ class RPCServer { void Complete(); void ResetBarrierCounter(); - RPCServerProfiler& Profiler() { return profiler_; } bool NeedResetAllVars(); @@ -101,7 +86,6 @@ class RPCServer { std::unordered_map rpc_cond_map_; std::atomic cur_cond_; std::condition_variable rpc_cond_; - RPCServerProfiler profiler_; protected: std::string bind_address_; diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index a038bad701b..7e8a0225c67 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -134,7 +134,6 @@ void ListenAndServOp::RunSyncLoop( rpc_service_->ResetBarrierCounter(); while (true) { - rpc_service_->Profiler().OneStep(); // Get from multiple trainers, we don't care about the order in which // the gradients arrives, just add suffix 0~n and merge the gradient. rpc_service_->SetCond(distributed::kRequestSend); diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index da46a1abe12..56bf9e31a35 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -226,7 +226,7 @@ RecordBlock::~RecordBlock() { void EnableProfiler(ProfilerState state) { PADDLE_ENFORCE(state != ProfilerState::kDisabled, - "Can't enbale profling, since the input state is ", + "Can't enable profiling, since the input state is ", "ProfilerState::kDisabled"); std::lock_guard l(profiler_mu); diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 737c8be8147..c4cfd8e4680 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -118,7 +118,6 @@ def __bootstrap__(): ] if core.is_compiled_with_dist(): read_env_flags.append('rpc_deadline') - read_env_flags.append('rpc_server_profile_period') read_env_flags.append('rpc_server_profile_path') read_env_flags.append('enable_rpc_profiler') read_env_flags.append('rpc_send_thread_num') -- GitLab