diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc index a9b89614aea3ec5c233827762e3deed3e9fdd3b1..7606f2bc06b2ecf07c5649eeae1a2d5587a8880c 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc @@ -22,8 +22,7 @@ namespace framework { namespace details { FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, - const std::vector> &local_scopes, + const ExecutionStrategy &strategy, const std::vector &local_scopes, const std::vector &places, std::unique_ptr &&graph) : strategy_(strategy), diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h index fb615d70b7a3407963cb7db7c696c9ed0330d036..dad3a231cba6402f57ba654a9ac5fb520b9c8f04 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h @@ -29,17 +29,16 @@ namespace details { class OpHandleBase; class FastThreadedSSAGraphExecutor : public SSAGraphExecutor { public: - FastThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, - const std::vector> &local_scopes, - const std::vector &places, - std::unique_ptr &&graph); + FastThreadedSSAGraphExecutor(const ExecutionStrategy &strategy, + const std::vector &local_scopes, + const std::vector &places, + std::unique_ptr &&graph); FeedFetchList Run(const std::vector &fetch_tensors) override; const ir::Graph &Graph() const override; private: ExecutionStrategy strategy_; - std::vector> local_scopes_; + std::vector local_scopes_; std::vector places_; std::unique_ptr graph_; diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index 2f4aefd39ddcc1c3c6903de64c525e934fc68e52..fe18b2060c5cd7e157374da53c5a985f70545ab7 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -22,7 +22,7 @@ namespace framework { namespace details { FetchOpHandle::FetchOpHandle(ir::Node *node, FeedFetchList *data, size_t offset, - std::vector> *local_scopes) + std::vector *local_scopes) : OpHandleBase(node), data_(data), offset_(offset), diff --git a/paddle/fluid/framework/details/fetch_op_handle.h b/paddle/fluid/framework/details/fetch_op_handle.h index a207e36b8ac046acb1da7e3798de7d1b6ff718ff..6ce42f92d7f1e81eeafd1eb5c28ce3564a5ffebc 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.h +++ b/paddle/fluid/framework/details/fetch_op_handle.h @@ -29,7 +29,7 @@ namespace details { struct FetchOpHandle : public OpHandleBase { public: FetchOpHandle(ir::Node *node, FeedFetchList *data, size_t offset, - std::vector> *local_scopes); + std::vector *local_scopes); ~FetchOpHandle(); @@ -47,7 +47,7 @@ struct FetchOpHandle : public OpHandleBase { private: FeedFetchList *data_; size_t offset_; - std::vector> *local_scopes_; + std::vector *local_scopes_; std::vector tensors_; }; diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index bf5671c679722103f9bbea4360bcbc4b2ed917d2..5bd974d6b789a2f085c0a69de5e133187342f587 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -23,8 +23,7 @@ namespace paddle { namespace framework { namespace details { ScopeBufferedSSAGraphExecutor::ScopeBufferedSSAGraphExecutor( - ExecutionStrategy strategy, - std::vector> local_scopes, + ExecutionStrategy strategy, std::vector local_scopes, std::vector var_infos, std::vector places, std::unique_ptr &&underlying_executor) : strategy_(std::move(strategy)), diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h index ec31755af5d6db7b91136e2d188ed5ae6134f5e0..5e87e0bf50b51d2b630aba06a5907dd721754d1f 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h @@ -37,8 +37,7 @@ struct VariableInfo { class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor { public: ScopeBufferedSSAGraphExecutor( - ExecutionStrategy strategy, - std::vector> local_scopes, + ExecutionStrategy strategy, std::vector local_scopes, std::vector var_infos, std::vector places, std::unique_ptr&& underlying_executor); @@ -53,7 +52,7 @@ class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor { ExecutionStrategy strategy_; std::unique_ptr underlying_executor_; - std::vector> local_scopes_; + std::vector local_scopes_; std::vector var_infos_; std::vector places_; }; diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index cc6f44436309ad3249960aa64e031e604a103344..c9e331ef359f853263f8dad38dd0a2be4d9618ad 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -21,8 +21,7 @@ namespace paddle { namespace framework { namespace details { ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, - const std::vector> &local_scopes, + const ExecutionStrategy &strategy, const std::vector &local_scopes, const std::vector &places, std::unique_ptr &&graph) : graph_(std::move(graph)), diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index 2a74af6c3d0a62b0a7f60a58babbd6263a2bff70..9135c1f5d435d5e2c60eb90c80803361aa31a3c4 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -38,11 +38,10 @@ namespace details { class ThreadedSSAGraphExecutor : public SSAGraphExecutor { public: - ThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, - const std::vector> &local_scopes, - const std::vector &places, - std::unique_ptr &&graph); + ThreadedSSAGraphExecutor(const ExecutionStrategy &strategy, + const std::vector &local_scopes, + const std::vector &places, + std::unique_ptr &&graph); const ir::Graph &Graph() const override { return *graph_; } // Run a SSAGraph by a thread pool @@ -58,7 +57,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { private: std::unique_ptr graph_; std::unique_ptr<::ThreadPool> pool_; - std::vector> local_scopes_; + std::vector local_scopes_; std::vector places_; platform::DeviceContextPool fetch_ctxs_; ExceptionHolder exception_holder_; diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 93c74deb3e96cad97121a23fe244c76593f96243..5b8c75a93de2ddd8f7260d2191c22a5945b3d2d9 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -39,8 +39,7 @@ std::unique_ptr ApplyParallelExecutorPass( const ProgramDesc &main_program, const std::vector &places, const std::string &loss_var_name, const std::unordered_set ¶m_names, - const std::vector> &local_scopes, - const bool use_cuda, + const std::vector &local_scopes, const bool use_cuda, #ifdef PADDLE_WITH_CUDA const BuildStrategy &strategy, platform::NCCLContextMap *nccl_ctxs) { #else @@ -67,8 +66,8 @@ std::unique_ptr ApplyParallelExecutorPass( &loss_var_name); multi_devices_pass->SetNotOwned>( "params", ¶m_names); - multi_devices_pass->SetNotOwned>>( - "local_scopes", &local_scopes); + multi_devices_pass->SetNotOwned>("local_scopes", + &local_scopes); multi_devices_pass->SetNotOwned("strategy", &strategy); #ifdef PADDLE_WITH_CUDA @@ -101,8 +100,8 @@ class ParallelExecutorPrivate { : places_(places) {} std::vector places_; - std::vector> local_scopes_; - std::shared_ptr global_scope_; + std::vector local_scopes_; + Scope *global_scope_; std::unique_ptr executor_; #ifdef PADDLE_WITH_CUDA @@ -113,7 +112,7 @@ class ParallelExecutorPrivate { bool use_all_reduce_; }; -std::vector> &ParallelExecutor::GetLocalScopes() { +std::vector &ParallelExecutor::GetLocalScopes() { return member_->local_scopes_; } @@ -122,8 +121,7 @@ ParallelExecutor::ParallelExecutor( const std::unordered_set ¶ms, const std::unordered_set &bcast_vars, const ProgramDesc &main_program, const std::string &loss_var_name, - const std::shared_ptr &scope, - const std::vector> &local_scopes, + Scope *scope, const std::vector &local_scopes, const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy, size_t num_trainers, size_t trainer_id) : member_(new ParallelExecutorPrivate(places)) { @@ -144,13 +142,13 @@ ParallelExecutor::ParallelExecutor( member_->own_local_scope_ = true; member_->local_scopes_.emplace_back(member_->global_scope_); for (size_t i = 1; i < member_->places_.size(); ++i) { - member_->local_scopes_.emplace_back(scope->NewSharedScope()); + member_->local_scopes_.emplace_back(&scope->NewScope()); } } else { member_->own_local_scope_ = false; PADDLE_ENFORCE_EQ(member_->places_.size(), local_scopes.size()); for (size_t i = 0; i < member_->places_.size(); ++i) { - member_->local_scopes_.emplace_back(local_scopes[i]->NewSharedScope()); + member_->local_scopes_.emplace_back(&local_scopes[i]->NewScope()); } } @@ -323,7 +321,7 @@ void ParallelExecutor::FeedTensorsIntoLocalScopes( for (size_t i = 0; i < tensors.size(); ++i) { auto &map = tensors[i]; - auto &scope = member_->local_scopes_[i]; + auto *scope = member_->local_scopes_[i]; for (auto &pair : map) { auto *trg = scope->Var(pair.first)->GetMutable(); trg->ShareDataWith(pair.second); @@ -353,15 +351,11 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( ParallelExecutor::~ParallelExecutor() { if (member_->own_local_scope_) { - std::vector local_scopes_ptrs; - local_scopes_ptrs.reserve(member_->local_scopes_.size()); for (size_t i = 1; i < member_->local_scopes_.size(); ++i) { - local_scopes_ptrs.emplace_back(member_->local_scopes_[i].get()); - member_->local_scopes_[i].reset(); - } - - for (size_t i = 0; i != local_scopes_ptrs.size(); ++i) { - member_->global_scope_->DeleteScope(local_scopes_ptrs[i]); + Scope *local_scope = member_->local_scopes_[i]; + if (member_->global_scope_->HasKid(local_scope)) { + member_->global_scope_->DeleteScope(local_scope); + } } } } diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index ce1076e44b9c9ac961ca8fed175b32a1c9bede81..5fb748fa205d5e9dbd2943b615c69aedd0e7a26f 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -39,20 +39,19 @@ class ParallelExecutor { DISABLE_COPY_AND_ASSIGN(ParallelExecutor); public: - explicit ParallelExecutor( - const std::vector &places, - const std::unordered_set ¶ms, - const std::unordered_set &bcast_vars, - const ProgramDesc &main_program, const std::string &loss_var_name, - const std::shared_ptr &scope, - const std::vector> &local_scopes, - const ExecutionStrategy &exec_strategy, - const BuildStrategy &build_strategy, size_t num_trainers = 1, - size_t trainer_id = 0); + explicit ParallelExecutor(const std::vector &places, + const std::unordered_set ¶ms, + const std::unordered_set &bcast_vars, + const ProgramDesc &main_program, + const std::string &loss_var_name, Scope *scope, + const std::vector &local_scopes, + const ExecutionStrategy &exec_strategy, + const BuildStrategy &build_strategy, + size_t num_trainers = 1, size_t trainer_id = 0); ~ParallelExecutor(); - std::vector> &GetLocalScopes(); + std::vector &GetLocalScopes(); /** * Feed tensors to local scopes. The size of tensors should be equal to the diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index fa6bf4429d714f8b264edc6924362aae1e762aef..2be655b89a4caf2bf9874dcab6bc0bdb2856a026 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -38,8 +38,8 @@ Scope::~Scope() { DropKids(); } Scope& Scope::NewScope() const { std::unique_lock lock(mutex_); - kids_.push_back(std::shared_ptr(new Scope(this))); - return kids_.back().get(); + kids_.push_back(new Scope(this)); + return *kids_.back(); } Variable* Scope::Var(const std::string& name) { @@ -68,9 +68,16 @@ const Scope* Scope::FindScope(const Variable* var) const { void Scope::DropKids() { std::unique_lock lock(mutex_); + for (Scope* s : kids_) delete s; kids_.clear(); } +bool Scope::HasKid(const Scope* scope) const { + std::unique_lock lock(mutex_); + auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); + return it != this->kids_.end(); +} + std::vector Scope::LocalVarNames() const { std::unique_lock lock(mutex_); std::vector known_vars; @@ -83,12 +90,8 @@ std::vector Scope::LocalVarNames() const { void Scope::DeleteScope(Scope* scope) const { std::unique_lock lock(mutex_); - auto it = std::find_if(this->kids_.begin(), this->kids_.end(), - [&scope](const std::shared_ptr& kid) { - return kid.get() == scope; - }); + auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); - it->reset(); this->kids_.erase(it); // When making memory benchmark on Fluid, we have to delete scope sync. if (FLAGS_benchmark || FLAGS_eager_delete_scope) { diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index 0ba5d34798d757fc6a7a9950db43ea2115b5976b..b6165a595d537c314a95685e8b1edbc42e387ab7 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -71,6 +71,9 @@ class Scope { /// Drop all kids scopes belonged to this scope. void DropKids(); + /// Find if a scope exists in the kid scopes + bool HasKid(const Scope* scope) const; + // enumerate all the variables current contains. std::vector LocalVarNames() const; @@ -105,7 +108,7 @@ class Scope { Variable* FindVarLocally(const std::string& name) const; // Scope in `kids_` are owned by this class. - mutable std::list> kids_; + mutable std::list kids_; Scope const* parent_{nullptr}; DISABLE_COPY_AND_ASSIGN(Scope); diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py index e5ae95e2d943917b9bc10f0d4c4bdc5f8fb07fdb..de276755bb1eb2746cc780575a40357255223809 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py @@ -178,7 +178,4 @@ if __name__ == '__main__': for parallel in (False, True): if use_cuda and not core.is_compiled_with_cuda(): continue - # TODO(minqiyang): remove this line after fixing the deletion - # order problem of Scope in ParallelExecutor in manylinux - if six.PY2: - main(use_cuda=use_cuda, parallel=parallel) + main(use_cuda=use_cuda, parallel=parallel) diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py index ff91be72c918f8dac65b7030e45c4a00deb965ac..dd547f3448ae55c07b6c09f9de4ac08d8ec5ee88 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py @@ -152,7 +152,4 @@ if __name__ == '__main__': for parallel in (False, True): if use_cuda and not core.is_compiled_with_cuda(): continue - # TODO(minqiyang): remove this line after fixing the deletion - # order problem of Scope in ParallelExecutor in manylinux - if six.PY2: - main(use_cuda=use_cuda, parallel=parallel) + main(use_cuda=use_cuda, parallel=parallel) diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py index fa72c939e57356f26d60032dd0a91c894b28c505..973308498bec3cddde2ef651751ad5d0c9f84503 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py @@ -155,7 +155,4 @@ if __name__ == '__main__': for parallel in (False, True): if use_cuda and not core.is_compiled_with_cuda(): continue - # TODO(minqiyang): remove this line after fixing the deletion - # order problem of Scope in ParallelExecutor in manylinux - if six.PY2: - main(use_cuda=use_cuda, parallel=parallel) + main(use_cuda=use_cuda, parallel=parallel) diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py index 440d2a30835cb89089709f024a4dcc6e4113efa8..cb4aeb430e1a9662a183084c0cdacc41c5a8ec11 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py @@ -137,7 +137,4 @@ if __name__ == '__main__': for parallel in (False, True): if use_cuda and not core.is_compiled_with_cuda(): continue - # TODO(minqiyang): remove this line after fixing the deletion - # order problem of Scope in ParallelExecutor in manylinux - if six.PY2: - main(use_cuda=use_cuda, parallel=parallel) + main(use_cuda=use_cuda, parallel=parallel)