From a3f9d6a38cc1587a7e331c33ffda0c0709406a66 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 5 Aug 2018 19:33:06 +0800 Subject: [PATCH] optimize profiler --- paddle/fluid/CMakeLists.txt | 2 +- paddle/fluid/framework/operator.cc | 9 ++++++--- paddle/fluid/operators/feed_op.cc | 1 - paddle/fluid/operators/fetch_barrier_op.cc | 6 ------ paddle/fluid/operators/fetch_op.cc | 3 --- paddle/fluid/operators/load_op.cc | 3 --- paddle/fluid/operators/recv_op.cc | 2 -- paddle/fluid/operators/send_barrier_op.cc | 5 ----- paddle/fluid/operators/send_op.cc | 3 --- 9 files changed, 7 insertions(+), 27 deletions(-) diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt index d274d96c2..e2e26fc5d 100644 --- a/paddle/fluid/CMakeLists.txt +++ b/paddle/fluid/CMakeLists.txt @@ -6,4 +6,4 @@ add_subdirectory(pybind) add_subdirectory(string) add_subdirectory(recordio) # NOTE: please add subdirectory inference at last. -add_subdirectory(inference) +#add_subdirectory(inference) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index cdac00739..18ec22680 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -136,6 +136,12 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { platform::SetDeviceId(dev_id); #endif } + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(place); + + // For profiling, don't move out of this function because that will result + // in the failure of multi-GPU profiling. + platform::RecordEvent record_event(Type(), dev_ctx); RunImpl(scope, place); VLOG(10) << "+ " << DebugStringEx(&scope); } @@ -639,9 +645,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope, platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto* dev_ctx = pool.Get(place); - // For profiling, don't move out of this function because that will result - // in the failure of multi-GPU profiling. - platform::RecordEvent record_event(Type(), dev_ctx); // check if op[type] has kernel registered. auto& all_op_kernels = AllOpKernels(); auto kernels_iter = all_op_kernels.find(type_); diff --git a/paddle/fluid/operators/feed_op.cc b/paddle/fluid/operators/feed_op.cc index bcb3e63ed..dc7ef6649 100644 --- a/paddle/fluid/operators/feed_op.cc +++ b/paddle/fluid/operators/feed_op.cc @@ -31,7 +31,6 @@ class FeedOp : public framework::OperatorBase { const platform::Place &place) const override { // get device context from pool auto *dev_ctx = platform::DeviceContextPool::Instance().Get(place); - platform::RecordEvent record_event(Type(), dev_ctx); auto feed_var_name = Input("X"); auto *feed_var = scope.FindVar(feed_var_name); diff --git a/paddle/fluid/operators/fetch_barrier_op.cc b/paddle/fluid/operators/fetch_barrier_op.cc index 680fde19e..d9cd956df 100644 --- a/paddle/fluid/operators/fetch_barrier_op.cc +++ b/paddle/fluid/operators/fetch_barrier_op.cc @@ -36,12 +36,6 @@ class FetchBarrierOp : public framework::OperatorBase { void RunImpl(const framework::Scope& scope, const platform::Place& place) const override { std::vector eps = Attr>("endpoints"); - - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - auto& ctx = *pool.Get(place); - // For profiling - platform::RecordEvent record_event(Type(), &ctx); - distributed::RPCClient* rpc_client = distributed::RPCClient::GetInstance(); diff --git a/paddle/fluid/operators/fetch_op.cc b/paddle/fluid/operators/fetch_op.cc index 1640a2a22..c197b45e8 100644 --- a/paddle/fluid/operators/fetch_op.cc +++ b/paddle/fluid/operators/fetch_op.cc @@ -30,9 +30,6 @@ class FetchOp : public framework::OperatorBase { private: void RunImpl(const framework::Scope &scope, const platform::Place &place) const override { - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - platform::RecordEvent record_event(Type(), pool.Get(place)); - auto fetch_var_name = Input("X"); auto *fetch_var = scope.FindVar(fetch_var_name); PADDLE_ENFORCE(fetch_var != nullptr, diff --git a/paddle/fluid/operators/load_op.cc b/paddle/fluid/operators/load_op.cc index ac35cf0b8..27e26cb1b 100644 --- a/paddle/fluid/operators/load_op.cc +++ b/paddle/fluid/operators/load_op.cc @@ -31,9 +31,6 @@ class LoadOp : public framework::OperatorBase { private: void RunImpl(const framework::Scope &scope, const platform::Place &place) const override { - auto *dev_ctx = platform::DeviceContextPool::Instance().Get(place); - platform::RecordEvent record_event(Type(), dev_ctx); - // FIXME(yuyang18): We save variable to local file now, but we should change // it to save an output stream. auto filename = Attr("file_path"); diff --git a/paddle/fluid/operators/recv_op.cc b/paddle/fluid/operators/recv_op.cc index 1ba684014..4a6ce938a 100644 --- a/paddle/fluid/operators/recv_op.cc +++ b/paddle/fluid/operators/recv_op.cc @@ -40,8 +40,6 @@ class RecvOp : public framework::OperatorBase { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto& ctx = *pool.Get(place); - // For profiling - platform::RecordEvent record_event(Type(), &ctx); distributed::RPCClient* rpc_client = distributed::RPCClient::GetInstance(); diff --git a/paddle/fluid/operators/send_barrier_op.cc b/paddle/fluid/operators/send_barrier_op.cc index d7f8e994a..1866a8604 100644 --- a/paddle/fluid/operators/send_barrier_op.cc +++ b/paddle/fluid/operators/send_barrier_op.cc @@ -39,11 +39,6 @@ class SendBarrierOp : public framework::OperatorBase { std::vector eps = Attr>("endpoints"); bool sync_mode = Attr("sync_mode"); - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - auto& ctx = *pool.Get(place); - // For profiling - platform::RecordEvent record_event(Type(), &ctx); - distributed::RPCClient* rpc_client = distributed::RPCClient::GetInstance(); diff --git a/paddle/fluid/operators/send_op.cc b/paddle/fluid/operators/send_op.cc index 829f310d4..3cd42f2d0 100644 --- a/paddle/fluid/operators/send_op.cc +++ b/paddle/fluid/operators/send_op.cc @@ -42,9 +42,6 @@ class SendOp : public framework::OperatorBase { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto& ctx = *pool.Get(place); - // For profiling - platform::RecordEvent record_event(Type(), &ctx); - distributed::RPCClient* rpc_client = distributed::RPCClient::GetInstance(); -- GitLab