diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc index 7606f2bc06b2ecf07c5649eeae1a2d5587a8880c..a9b89614aea3ec5c233827762e3deed3e9fdd3b1 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc @@ -22,7 +22,8 @@ namespace framework { namespace details { FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, const std::vector &local_scopes, + const ExecutionStrategy &strategy, + const std::vector> &local_scopes, const std::vector &places, std::unique_ptr &&graph) : strategy_(strategy), diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h index dad3a231cba6402f57ba654a9ac5fb520b9c8f04..fb615d70b7a3407963cb7db7c696c9ed0330d036 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h @@ -29,16 +29,17 @@ namespace details { class OpHandleBase; class FastThreadedSSAGraphExecutor : public SSAGraphExecutor { public: - FastThreadedSSAGraphExecutor(const ExecutionStrategy &strategy, - const std::vector &local_scopes, - const std::vector &places, - std::unique_ptr &&graph); + FastThreadedSSAGraphExecutor( + const ExecutionStrategy &strategy, + const std::vector> &local_scopes, + const std::vector &places, + std::unique_ptr &&graph); FeedFetchList Run(const std::vector &fetch_tensors) override; const ir::Graph &Graph() const override; private: ExecutionStrategy strategy_; - std::vector local_scopes_; + std::vector> local_scopes_; std::vector places_; std::unique_ptr graph_; diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index fe18b2060c5cd7e157374da53c5a985f70545ab7..2f4aefd39ddcc1c3c6903de64c525e934fc68e52 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -22,7 +22,7 @@ namespace framework { namespace details { FetchOpHandle::FetchOpHandle(ir::Node *node, FeedFetchList *data, size_t offset, - std::vector *local_scopes) + std::vector> *local_scopes) : OpHandleBase(node), data_(data), offset_(offset), diff --git a/paddle/fluid/framework/details/fetch_op_handle.h b/paddle/fluid/framework/details/fetch_op_handle.h index 6ce42f92d7f1e81eeafd1eb5c28ce3564a5ffebc..a207e36b8ac046acb1da7e3798de7d1b6ff718ff 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.h +++ b/paddle/fluid/framework/details/fetch_op_handle.h @@ -29,7 +29,7 @@ namespace details { struct FetchOpHandle : public OpHandleBase { public: FetchOpHandle(ir::Node *node, FeedFetchList *data, size_t offset, - std::vector *local_scopes); + std::vector> *local_scopes); ~FetchOpHandle(); @@ -47,7 +47,7 @@ struct FetchOpHandle : public OpHandleBase { private: FeedFetchList *data_; size_t offset_; - std::vector *local_scopes_; + std::vector> *local_scopes_; std::vector tensors_; }; diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index 5bd974d6b789a2f085c0a69de5e133187342f587..bf5671c679722103f9bbea4360bcbc4b2ed917d2 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -23,7 +23,8 @@ namespace paddle { namespace framework { namespace details { ScopeBufferedSSAGraphExecutor::ScopeBufferedSSAGraphExecutor( - ExecutionStrategy strategy, std::vector local_scopes, + ExecutionStrategy strategy, + std::vector> local_scopes, std::vector var_infos, std::vector places, std::unique_ptr &&underlying_executor) : strategy_(std::move(strategy)), diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h index 5e87e0bf50b51d2b630aba06a5907dd721754d1f..ec31755af5d6db7b91136e2d188ed5ae6134f5e0 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h @@ -37,7 +37,8 @@ struct VariableInfo { class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor { public: ScopeBufferedSSAGraphExecutor( - ExecutionStrategy strategy, std::vector local_scopes, + ExecutionStrategy strategy, + std::vector> local_scopes, std::vector var_infos, std::vector places, std::unique_ptr&& underlying_executor); @@ -52,7 +53,7 @@ class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor { ExecutionStrategy strategy_; std::unique_ptr underlying_executor_; - std::vector local_scopes_; + std::vector> local_scopes_; std::vector var_infos_; std::vector places_; }; diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index c9e331ef359f853263f8dad38dd0a2be4d9618ad..cc6f44436309ad3249960aa64e031e604a103344 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -21,7 +21,8 @@ namespace paddle { namespace framework { namespace details { ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, const std::vector &local_scopes, + const ExecutionStrategy &strategy, + const std::vector> &local_scopes, const std::vector &places, std::unique_ptr &&graph) : graph_(std::move(graph)), diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index 9135c1f5d435d5e2c60eb90c80803361aa31a3c4..2a74af6c3d0a62b0a7f60a58babbd6263a2bff70 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -38,10 +38,11 @@ namespace details { class ThreadedSSAGraphExecutor : public SSAGraphExecutor { public: - ThreadedSSAGraphExecutor(const ExecutionStrategy &strategy, - const std::vector &local_scopes, - const std::vector &places, - std::unique_ptr &&graph); + ThreadedSSAGraphExecutor( + const ExecutionStrategy &strategy, + const std::vector> &local_scopes, + const std::vector &places, + std::unique_ptr &&graph); const ir::Graph &Graph() const override { return *graph_; } // Run a SSAGraph by a thread pool @@ -57,7 +58,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { private: std::unique_ptr graph_; std::unique_ptr<::ThreadPool> pool_; - std::vector local_scopes_; + std::vector> local_scopes_; std::vector places_; platform::DeviceContextPool fetch_ctxs_; ExceptionHolder exception_holder_; diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 81cb24bdda6b87a3d708cf5047dce05d5020a0d5..93c74deb3e96cad97121a23fe244c76593f96243 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -39,7 +39,8 @@ std::unique_ptr ApplyParallelExecutorPass( const ProgramDesc &main_program, const std::vector &places, const std::string &loss_var_name, const std::unordered_set ¶m_names, - const std::vector &local_scopes, const bool use_cuda, + const std::vector> &local_scopes, + const bool use_cuda, #ifdef PADDLE_WITH_CUDA const BuildStrategy &strategy, platform::NCCLContextMap *nccl_ctxs) { #else @@ -66,8 +67,8 @@ std::unique_ptr ApplyParallelExecutorPass( &loss_var_name); multi_devices_pass->SetNotOwned>( "params", ¶m_names); - multi_devices_pass->SetNotOwned>("local_scopes", - &local_scopes); + multi_devices_pass->SetNotOwned>>( + "local_scopes", &local_scopes); multi_devices_pass->SetNotOwned("strategy", &strategy); #ifdef PADDLE_WITH_CUDA @@ -100,8 +101,8 @@ class ParallelExecutorPrivate { : places_(places) {} std::vector places_; - std::vector local_scopes_; - Scope *global_scope_; + std::vector> local_scopes_; + std::shared_ptr global_scope_; std::unique_ptr executor_; #ifdef PADDLE_WITH_CUDA @@ -112,7 +113,7 @@ class ParallelExecutorPrivate { bool use_all_reduce_; }; -std::vector &ParallelExecutor::GetLocalScopes() { +std::vector> &ParallelExecutor::GetLocalScopes() { return member_->local_scopes_; } @@ -121,7 +122,8 @@ ParallelExecutor::ParallelExecutor( const std::unordered_set ¶ms, const std::unordered_set &bcast_vars, const ProgramDesc &main_program, const std::string &loss_var_name, - Scope *scope, const std::vector &local_scopes, + const std::shared_ptr &scope, + const std::vector> &local_scopes, const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy, size_t num_trainers, size_t trainer_id) : member_(new ParallelExecutorPrivate(places)) { @@ -142,13 +144,13 @@ ParallelExecutor::ParallelExecutor( member_->own_local_scope_ = true; member_->local_scopes_.emplace_back(member_->global_scope_); for (size_t i = 1; i < member_->places_.size(); ++i) { - member_->local_scopes_.emplace_back(&scope->NewScope()); + member_->local_scopes_.emplace_back(scope->NewSharedScope()); } } else { member_->own_local_scope_ = false; PADDLE_ENFORCE_EQ(member_->places_.size(), local_scopes.size()); for (size_t i = 0; i < member_->places_.size(); ++i) { - member_->local_scopes_.emplace_back(&local_scopes[i]->NewScope()); + member_->local_scopes_.emplace_back(local_scopes[i]->NewSharedScope()); } } @@ -321,7 +323,7 @@ void ParallelExecutor::FeedTensorsIntoLocalScopes( for (size_t i = 0; i < tensors.size(); ++i) { auto &map = tensors[i]; - auto *scope = member_->local_scopes_[i]; + auto &scope = member_->local_scopes_[i]; for (auto &pair : map) { auto *trg = scope->Var(pair.first)->GetMutable(); trg->ShareDataWith(pair.second); @@ -351,8 +353,15 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( ParallelExecutor::~ParallelExecutor() { if (member_->own_local_scope_) { + std::vector local_scopes_ptrs; + local_scopes_ptrs.reserve(member_->local_scopes_.size()); for (size_t i = 1; i < member_->local_scopes_.size(); ++i) { - member_->global_scope_->DeleteScope(member_->local_scopes_[i]); + local_scopes_ptrs.emplace_back(member_->local_scopes_[i].get()); + member_->local_scopes_[i].reset(); + } + + for (size_t i = 0; i != local_scopes_ptrs.size(); ++i) { + member_->global_scope_->DeleteScope(local_scopes_ptrs[i]); } } } diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 5fb748fa205d5e9dbd2943b615c69aedd0e7a26f..ce1076e44b9c9ac961ca8fed175b32a1c9bede81 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -39,19 +39,20 @@ class ParallelExecutor { DISABLE_COPY_AND_ASSIGN(ParallelExecutor); public: - explicit ParallelExecutor(const std::vector &places, - const std::unordered_set ¶ms, - const std::unordered_set &bcast_vars, - const ProgramDesc &main_program, - const std::string &loss_var_name, Scope *scope, - const std::vector &local_scopes, - const ExecutionStrategy &exec_strategy, - const BuildStrategy &build_strategy, - size_t num_trainers = 1, size_t trainer_id = 0); + explicit ParallelExecutor( + const std::vector &places, + const std::unordered_set ¶ms, + const std::unordered_set &bcast_vars, + const ProgramDesc &main_program, const std::string &loss_var_name, + const std::shared_ptr &scope, + const std::vector> &local_scopes, + const ExecutionStrategy &exec_strategy, + const BuildStrategy &build_strategy, size_t num_trainers = 1, + size_t trainer_id = 0); ~ParallelExecutor(); - std::vector &GetLocalScopes(); + std::vector> &GetLocalScopes(); /** * Feed tensors to local scopes. The size of tensors should be equal to the diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index 50f374e3703a97f6c1fdb4b14fdeb0b603f9ac86..fa6bf4429d714f8b264edc6924362aae1e762aef 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -38,8 +38,8 @@ Scope::~Scope() { DropKids(); } Scope& Scope::NewScope() const { std::unique_lock lock(mutex_); - kids_.push_back(new Scope(this)); - return *kids_.back(); + kids_.push_back(std::shared_ptr(new Scope(this))); + return kids_.back().get(); } Variable* Scope::Var(const std::string& name) { @@ -68,7 +68,6 @@ const Scope* Scope::FindScope(const Variable* var) const { void Scope::DropKids() { std::unique_lock lock(mutex_); - for (Scope* s : kids_) delete s; kids_.clear(); } @@ -84,8 +83,12 @@ std::vector Scope::LocalVarNames() const { void Scope::DeleteScope(Scope* scope) const { std::unique_lock lock(mutex_); - auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); + auto it = std::find_if(this->kids_.begin(), this->kids_.end(), + [&scope](const std::shared_ptr& kid) { + return kid.get() == scope; + }); PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); + it->reset(); this->kids_.erase(it); // When making memory benchmark on Fluid, we have to delete scope sync. if (FLAGS_benchmark || FLAGS_eager_delete_scope) { diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index e246241c0abfbc7bdcaf38d073cc58fc36a4f737..0ba5d34798d757fc6a7a9950db43ea2115b5976b 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -105,7 +105,7 @@ class Scope { Variable* FindVarLocally(const std::string& name) const; // Scope in `kids_` are owned by this class. - mutable std::list kids_; + mutable std::list> kids_; Scope const* parent_{nullptr}; DISABLE_COPY_AND_ASSIGN(Scope);