diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc index 3da7e35559323b9041ece8838559b1d88ab7153f..b33162edd2b69ca0703f27041e71fe72da9779e3 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc @@ -20,6 +20,7 @@ #include "paddle/fluid/framework/details/fetch_op_handle.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/ir/graph_helper.h" +#include "paddle/fluid/platform/profiler.h" namespace paddle { namespace framework { @@ -50,6 +51,8 @@ FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor( FeedFetchList FastThreadedSSAGraphExecutor::Run( const std::vector &fetch_tensors) { VLOG(3) << "enter FastThreadedSSAGraphExecutor Run"; + std::unique_ptr event( + new platform::RecordEvent("FastThreadedSSAGraphExecutorPrepare")); std::unique_ptr>> op_deps = atomic_op_deps_.get(); PrepareAtomicOpDeps(); @@ -64,7 +67,7 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run( InsertFetchOps(fetch_tensors, &fetches, &fetched_vars, op_deps.get(), &fetch_ops, &ready_fetch_ops); - + event.reset(nullptr); if (strategy_.num_threads_ == 1 && traced_ops_.size() == num_ops) { // If the num_threads is 1, we can record the order of operator's // execution in the first iteration, and in subsequent iterations, diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index 247d78479348da998a46d7838b89c481c9e299e5..a0fb20a6478a5a378495b919ff6ea85dba168b1e 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -36,26 +36,10 @@ ScopeBufferedSSAGraphExecutor::ScopeBufferedSSAGraphExecutor( FeedFetchList ScopeBufferedSSAGraphExecutor::Run( const std::vector &fetch_tensors) { if (drop_scope_counter_ == 0) { - // Create local scopes. - for (auto it = local_scopes_.rbegin(); it != local_scopes_.rend(); ++it) { - auto &scope = *it; - Scope &local_scope = scope->NewScope(); - *scope->Var(details::kLocalExecScopeName)->GetMutable() = - &local_scope; - - for (auto &info : var_infos_) { - if (scope->FindVar(info.name_) != nullptr) { - continue; - } - - if (info.persistable_) { // Persistable - InitializeVariable(scope->Var(info.name_), info.type_); - } else { - InitializeVariable(local_scope.Var(info.name_), info.type_); - } - } - } + platform::RecordEvent e("InitLocalExeScopes"); + PrepareLocalExeScopes(); } + std::vector fetch_data; std::exception_ptr eptr = nullptr; try { @@ -64,9 +48,7 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run( eptr = std::current_exception(); } - platform::RecordEvent e("ScopeBufferedSSAGraphExecutorAfterRun"); ++drop_scope_counter_; - if (drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) { DropLocalExeScopes(); } @@ -78,11 +60,11 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run( } void ScopeBufferedSSAGraphExecutor::DropLocalExeScopes() { + platform::RecordEvent drop_scope_event("DropLocalExeScopes"); drop_scope_counter_ = 0; for (auto p : places_) { platform::DeviceContextPool::Instance().Get(p)->Wait(); } - for (auto &scope : local_scopes_) { auto &local_scope = *scope->Var(details::kLocalExecScopeName)->GetMutable(); @@ -91,6 +73,26 @@ void ScopeBufferedSSAGraphExecutor::DropLocalExeScopes() { } } +void ScopeBufferedSSAGraphExecutor::PrepareLocalExeScopes() { + // Create local scopes. + for (auto it = local_scopes_.rbegin(); it != local_scopes_.rend(); ++it) { + auto &scope = *it; + Scope &local_scope = scope->NewScope(); + *scope->Var(kLocalExecScopeName)->GetMutable() = &local_scope; + + for (auto &info : var_infos_) { + if (scope->FindVar(info.name_) != nullptr) { + continue; + } + if (info.persistable_) { // Persistable + InitializeVariable(scope->Var(info.name_), info.type_); + } else { + InitializeVariable(local_scope.Var(info.name_), info.type_); + } + } + } +} + bool ScopeBufferedSSAGraphExecutor::NeedCreateLocalExeScope() { return drop_scope_counter_ == 0; } diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h index 030777cad894fa24ccdc0afa1aae8e7e4caa90ee..e0388be305f2285b941bc7193a8d97e52ce765c9 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h @@ -13,7 +13,8 @@ // limitations under the License. #pragma once - +#include +#include #include #include #include @@ -51,6 +52,8 @@ class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor { bool NeedCreateLocalExeScope(); + void PrepareLocalExeScopes(); + private: size_t drop_scope_counter_{0}; ExecutionStrategy strategy_; diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 0667748c2ac54e1c0c37487f17a9448a65268755..d6acaba6e2df33cbf5c7147747de6adf4f0ee96f 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -586,6 +586,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, platform::RecordBlock b(0); if (member_->HasGarbageCollectors()) { + platform::RecordEvent event("PrepareGarbageCollectors"); member_->ResetRuntimeReferenceCount(fetch_tensors, fetched_var_name); }