From 67c8dade586cf05c9c4addb7a313ff558305dc89 Mon Sep 17 00:00:00 2001 From: chengduo Date: Thu, 30 May 2019 21:00:57 +0800 Subject: [PATCH] Add Event in ScopeBuffer Executor (#17667) * add event for fast executor and add threads for scopebuffer executor test=develop --- .../fast_threaded_ssa_graph_executor.cc | 5 +- .../scope_buffered_ssa_graph_executor.cc | 46 ++++++++++--------- .../scope_buffered_ssa_graph_executor.h | 5 +- paddle/fluid/framework/parallel_executor.cc | 1 + 4 files changed, 33 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc index 3da7e35559..b33162edd2 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc @@ -20,6 +20,7 @@ #include "paddle/fluid/framework/details/fetch_op_handle.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/ir/graph_helper.h" +#include "paddle/fluid/platform/profiler.h" namespace paddle { namespace framework { @@ -50,6 +51,8 @@ FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor( FeedFetchList FastThreadedSSAGraphExecutor::Run( const std::vector &fetch_tensors) { VLOG(3) << "enter FastThreadedSSAGraphExecutor Run"; + std::unique_ptr event( + new platform::RecordEvent("FastThreadedSSAGraphExecutorPrepare")); std::unique_ptr>> op_deps = atomic_op_deps_.get(); PrepareAtomicOpDeps(); @@ -64,7 +67,7 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run( InsertFetchOps(fetch_tensors, &fetches, &fetched_vars, op_deps.get(), &fetch_ops, &ready_fetch_ops); - + event.reset(nullptr); if (strategy_.num_threads_ == 1 && traced_ops_.size() == num_ops) { // If the num_threads is 1, we can record the order of operator's // execution in the first iteration, and in subsequent iterations, diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index 247d784793..a0fb20a647 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -36,26 +36,10 @@ ScopeBufferedSSAGraphExecutor::ScopeBufferedSSAGraphExecutor( FeedFetchList ScopeBufferedSSAGraphExecutor::Run( const std::vector &fetch_tensors) { if (drop_scope_counter_ == 0) { - // Create local scopes. - for (auto it = local_scopes_.rbegin(); it != local_scopes_.rend(); ++it) { - auto &scope = *it; - Scope &local_scope = scope->NewScope(); - *scope->Var(details::kLocalExecScopeName)->GetMutable() = - &local_scope; - - for (auto &info : var_infos_) { - if (scope->FindVar(info.name_) != nullptr) { - continue; - } - - if (info.persistable_) { // Persistable - InitializeVariable(scope->Var(info.name_), info.type_); - } else { - InitializeVariable(local_scope.Var(info.name_), info.type_); - } - } - } + platform::RecordEvent e("InitLocalExeScopes"); + PrepareLocalExeScopes(); } + std::vector fetch_data; std::exception_ptr eptr = nullptr; try { @@ -64,9 +48,7 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run( eptr = std::current_exception(); } - platform::RecordEvent e("ScopeBufferedSSAGraphExecutorAfterRun"); ++drop_scope_counter_; - if (drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) { DropLocalExeScopes(); } @@ -78,11 +60,11 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run( } void ScopeBufferedSSAGraphExecutor::DropLocalExeScopes() { + platform::RecordEvent drop_scope_event("DropLocalExeScopes"); drop_scope_counter_ = 0; for (auto p : places_) { platform::DeviceContextPool::Instance().Get(p)->Wait(); } - for (auto &scope : local_scopes_) { auto &local_scope = *scope->Var(details::kLocalExecScopeName)->GetMutable(); @@ -91,6 +73,26 @@ void ScopeBufferedSSAGraphExecutor::DropLocalExeScopes() { } } +void ScopeBufferedSSAGraphExecutor::PrepareLocalExeScopes() { + // Create local scopes. + for (auto it = local_scopes_.rbegin(); it != local_scopes_.rend(); ++it) { + auto &scope = *it; + Scope &local_scope = scope->NewScope(); + *scope->Var(kLocalExecScopeName)->GetMutable() = &local_scope; + + for (auto &info : var_infos_) { + if (scope->FindVar(info.name_) != nullptr) { + continue; + } + if (info.persistable_) { // Persistable + InitializeVariable(scope->Var(info.name_), info.type_); + } else { + InitializeVariable(local_scope.Var(info.name_), info.type_); + } + } + } +} + bool ScopeBufferedSSAGraphExecutor::NeedCreateLocalExeScope() { return drop_scope_counter_ == 0; } diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h index 030777cad8..e0388be305 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h @@ -13,7 +13,8 @@ // limitations under the License. #pragma once - +#include +#include #include #include #include @@ -51,6 +52,8 @@ class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor { bool NeedCreateLocalExeScope(); + void PrepareLocalExeScopes(); + private: size_t drop_scope_counter_{0}; ExecutionStrategy strategy_; diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 0667748c2a..d6acaba6e2 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -586,6 +586,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, platform::RecordBlock b(0); if (member_->HasGarbageCollectors()) { + platform::RecordEvent event("PrepareGarbageCollectors"); member_->ResetRuntimeReferenceCount(fetch_tensors, fetched_var_name); } -- GitLab