diff --git a/paddle/fluid/operators/distributed/grpc_variable_response.cc b/paddle/fluid/operators/distributed/grpc_variable_response.cc index 9e54aafb2d2ecb38cfcd9e1cc5242a56fe4ddc8b..d6d219d4369ba785e5c369538d4a18dc682952c1 100644 --- a/paddle/fluid/operators/distributed/grpc_variable_response.cc +++ b/paddle/fluid/operators/distributed/grpc_variable_response.cc @@ -286,10 +286,10 @@ int GRPCVariableResponse::Parse(Source* source) { platform::EnableProfiler(platform::ProfilerState::kCPU); } else if (profiling == platform::kDisableProfiler && platform::IsProfileEnabled()) { - // TODO(panyx0718): Should we allow to customize file dir. platform::DisableProfiler( platform::EventSortingKey::kDefault, - string::Sprintf("/tmp/profile_ps_%lld", listener_id)); + string::Sprintf("%s_%lld", FLAGS_rpc_server_profile_path, + listener_id)); } break; } diff --git a/paddle/fluid/operators/distributed/request_handler_impl.cc b/paddle/fluid/operators/distributed/request_handler_impl.cc index 40143887e510532cb8b0fb4a82aae3cbf7bfd320..025528fe70b8f4d353ab92f29b1bd71c77cf7850 100644 --- a/paddle/fluid/operators/distributed/request_handler_impl.cc +++ b/paddle/fluid/operators/distributed/request_handler_impl.cc @@ -51,7 +51,6 @@ bool RequestSendHandler::Handle(const std::string& varname, // Async if (!sync_mode_) { VLOG(3) << "async process var: " << varname; - rpc_server_->Profiler().OneStep(); try { executor_->RunPreparedContext((*grad_to_prepared_ctx_)[varname].get(), scope); diff --git a/paddle/fluid/operators/distributed/rpc_server.cc b/paddle/fluid/operators/distributed/rpc_server.cc index 084480ae48b8b9267ade1a840f6a70519cb28e48..3e30ed4ac86bd2cb3f7c4301163e54a947c3d5b4 100644 --- a/paddle/fluid/operators/distributed/rpc_server.cc +++ b/paddle/fluid/operators/distributed/rpc_server.cc @@ -20,42 +20,10 @@ #include "paddle/fluid/operators/distributed/rpc_server.h" #include "paddle/fluid/platform/profiler.h" -DEFINE_int32(rpc_server_profile_period, 0, - "the period of listen_and_serv to do profile"); -DEFINE_string(rpc_server_profile_path, "/dev/null", - "the profile log file path"); - namespace paddle { namespace operators { namespace distributed { -RPCServerProfiler::RPCServerProfiler(int profile_period, - const std::string& profile_log_path) - : profile_period_(profile_period), profile_log_path_(profile_log_path) { - step_ = 0; -} - -void RPCServerProfiler::OneStep() { - PADDLE_ENFORCE_LE(step_, profile_period_, - "step_ should not be larger then " - "profile_period_"); - if (profile_period_ <= 0) { - return; - } - - if (step_ == 0) { - auto pf_state = paddle::platform::ProfilerState::kCPU; - paddle::platform::EnableProfiler(pf_state); - } - if (step_ == profile_period_) { - paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kTotal, - profile_log_path_); - step_ = 0; - } else { - step_++; - } -} - void RPCServer::ShutDown() { LOG(INFO) << "RPCServer ShutDown "; ShutDownImpl(); diff --git a/paddle/fluid/operators/distributed/rpc_server.h b/paddle/fluid/operators/distributed/rpc_server.h index f3e61e1575ced0b9ffbad23e6973121daca9751b..c78c5007a7f262f15305b6c284e8c4fbddef42a0 100644 --- a/paddle/fluid/operators/distributed/rpc_server.h +++ b/paddle/fluid/operators/distributed/rpc_server.h @@ -23,30 +23,14 @@ #include "paddle/fluid/operators/distributed/request_handler.h" -DECLARE_int32(rpc_server_profile_period); -DECLARE_string(rpc_server_profile_path); - namespace paddle { namespace operators { namespace distributed { -class RPCServerProfiler { - public: - RPCServerProfiler(int profile_period, const std::string& profile_log_path); - void OneStep(); - - private: - const int profile_period_; - std::string profile_log_path_; - int step_; -}; - class RPCServer { public: explicit RPCServer(const std::string& address, int client_num) : cur_cond_(0), - profiler_(FLAGS_rpc_server_profile_period, - FLAGS_rpc_server_profile_path), bind_address_(address), exit_flag_(false), selected_port_(0), @@ -86,7 +70,6 @@ class RPCServer { void Complete(); void ResetBarrierCounter(); - RPCServerProfiler& Profiler() { return profiler_; } bool NeedResetAllVars(); @@ -101,7 +84,6 @@ class RPCServer { std::unordered_map rpc_cond_map_; std::atomic cur_cond_; std::condition_variable rpc_cond_; - RPCServerProfiler profiler_; protected: std::string bind_address_; diff --git a/paddle/fluid/operators/distributed/variable_response.cc b/paddle/fluid/operators/distributed/variable_response.cc index c4854d50b6371064003a10e18efc9e5f160d9a42..b2f73b67dc9bf944892187abd2e5709e54449d7d 100644 --- a/paddle/fluid/operators/distributed/variable_response.cc +++ b/paddle/fluid/operators/distributed/variable_response.cc @@ -16,6 +16,9 @@ #include #include "paddle/fluid/operators/distributed/sendrecvop_utils.h" +DEFINE_string(rpc_server_profile_path, "./profile_ps", + "the profile log file path"); + namespace paddle { namespace operators { namespace distributed { diff --git a/paddle/fluid/operators/distributed/variable_response.h b/paddle/fluid/operators/distributed/variable_response.h index f20a6038cefc28ff0569e2523cf77ddd172aa4e8..4c7fcbbdfb305ce6b4fc9d1edd9738899b200ec6 100644 --- a/paddle/fluid/operators/distributed/variable_response.h +++ b/paddle/fluid/operators/distributed/variable_response.h @@ -27,6 +27,8 @@ #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/distributed/send_recv.pb.h" +DECLARE_string(rpc_server_profile_path); + namespace paddle { namespace operators { namespace distributed { diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index 865799589c4a5525f845516022171a9117fa0bac..1d8b1411cddf4fe16d2d00313c519cc173e1504d 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -134,7 +134,6 @@ void ListenAndServOp::RunSyncLoop( rpc_service_->ResetBarrierCounter(); while (true) { - rpc_service_->Profiler().OneStep(); // Get from multiple trainers, we don't care about the order in which // the gradients arrives, just add suffix 0~n and merge the gradient. rpc_service_->SetCond(distributed::kRequestSend); diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index da46a1abe12258b47b2fd4afb5f146daf15e026d..56bf9e31a35fdec5b7f04849068ff96ac9776c0e 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -226,7 +226,7 @@ RecordBlock::~RecordBlock() { void EnableProfiler(ProfilerState state) { PADDLE_ENFORCE(state != ProfilerState::kDisabled, - "Can't enbale profling, since the input state is ", + "Can't enable profiling, since the input state is ", "ProfilerState::kDisabled"); std::lock_guard l(profiler_mu); diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 737c8be8147a7efaf9b89827f063430146d3c078..c4cfd8e4680a3564b099eb4d8e3587e45f907572 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -118,7 +118,6 @@ def __bootstrap__(): ] if core.is_compiled_with_dist(): read_env_flags.append('rpc_deadline') - read_env_flags.append('rpc_server_profile_period') read_env_flags.append('rpc_server_profile_path') read_env_flags.append('enable_rpc_profiler') read_env_flags.append('rpc_send_thread_num')