From dc863aac7edeccbe8362d625b2c1e6eeca885000 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Mon, 10 Sep 2018 14:29:19 +0800 Subject: [PATCH] Add kids exists detection in Scope --- .../fast_threaded_ssa_graph_executor.cc | 3 +- .../fast_threaded_ssa_graph_executor.h | 11 +++--- .../framework/details/fetch_op_handle.cc | 2 +- .../fluid/framework/details/fetch_op_handle.h | 4 +-- .../scope_buffered_ssa_graph_executor.cc | 3 +- .../scope_buffered_ssa_graph_executor.h | 5 ++- .../details/threaded_ssa_graph_executor.cc | 3 +- .../details/threaded_ssa_graph_executor.h | 11 +++--- paddle/fluid/framework/parallel_executor.cc | 34 ++++++++----------- paddle/fluid/framework/parallel_executor.h | 21 ++++++------ paddle/fluid/framework/scope.cc | 17 ++++++---- paddle/fluid/framework/scope.h | 5 ++- .../test_image_classification_resnet.py | 5 +-- .../test_image_classification_vgg.py | 5 +-- .../test_recognize_digits_conv.py | 5 +-- .../test_recognize_digits_mlp.py | 5 +-- 16 files changed, 60 insertions(+), 79 deletions(-) diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc index a9b89614ae..7606f2bc06 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc @@ -22,8 +22,7 @@ namespace framework { namespace details { FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, - const std::vector> &local_scopes, + const ExecutionStrategy &strategy, const std::vector &local_scopes, const std::vector &places, std::unique_ptr &&graph) : strategy_(strategy), diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h index fb615d70b7..dad3a231cb 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h @@ -29,17 +29,16 @@ namespace details { class OpHandleBase; class FastThreadedSSAGraphExecutor : public SSAGraphExecutor { public: - FastThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, - const std::vector> &local_scopes, - const std::vector &places, - std::unique_ptr &&graph); + FastThreadedSSAGraphExecutor(const ExecutionStrategy &strategy, + const std::vector &local_scopes, + const std::vector &places, + std::unique_ptr &&graph); FeedFetchList Run(const std::vector &fetch_tensors) override; const ir::Graph &Graph() const override; private: ExecutionStrategy strategy_; - std::vector> local_scopes_; + std::vector local_scopes_; std::vector places_; std::unique_ptr graph_; diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index 2f4aefd39d..fe18b2060c 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -22,7 +22,7 @@ namespace framework { namespace details { FetchOpHandle::FetchOpHandle(ir::Node *node, FeedFetchList *data, size_t offset, - std::vector> *local_scopes) + std::vector *local_scopes) : OpHandleBase(node), data_(data), offset_(offset), diff --git a/paddle/fluid/framework/details/fetch_op_handle.h b/paddle/fluid/framework/details/fetch_op_handle.h index a207e36b8a..6ce42f92d7 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.h +++ b/paddle/fluid/framework/details/fetch_op_handle.h @@ -29,7 +29,7 @@ namespace details { struct FetchOpHandle : public OpHandleBase { public: FetchOpHandle(ir::Node *node, FeedFetchList *data, size_t offset, - std::vector> *local_scopes); + std::vector *local_scopes); ~FetchOpHandle(); @@ -47,7 +47,7 @@ struct FetchOpHandle : public OpHandleBase { private: FeedFetchList *data_; size_t offset_; - std::vector> *local_scopes_; + std::vector *local_scopes_; std::vector tensors_; }; diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index bf5671c679..5bd974d6b7 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -23,8 +23,7 @@ namespace paddle { namespace framework { namespace details { ScopeBufferedSSAGraphExecutor::ScopeBufferedSSAGraphExecutor( - ExecutionStrategy strategy, - std::vector> local_scopes, + ExecutionStrategy strategy, std::vector local_scopes, std::vector var_infos, std::vector places, std::unique_ptr &&underlying_executor) : strategy_(std::move(strategy)), diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h index ec31755af5..5e87e0bf50 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h @@ -37,8 +37,7 @@ struct VariableInfo { class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor { public: ScopeBufferedSSAGraphExecutor( - ExecutionStrategy strategy, - std::vector> local_scopes, + ExecutionStrategy strategy, std::vector local_scopes, std::vector var_infos, std::vector places, std::unique_ptr&& underlying_executor); @@ -53,7 +52,7 @@ class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor { ExecutionStrategy strategy_; std::unique_ptr underlying_executor_; - std::vector> local_scopes_; + std::vector local_scopes_; std::vector var_infos_; std::vector places_; }; diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index cc6f444363..c9e331ef35 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -21,8 +21,7 @@ namespace paddle { namespace framework { namespace details { ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, - const std::vector> &local_scopes, + const ExecutionStrategy &strategy, const std::vector &local_scopes, const std::vector &places, std::unique_ptr &&graph) : graph_(std::move(graph)), diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index 2a74af6c3d..9135c1f5d4 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -38,11 +38,10 @@ namespace details { class ThreadedSSAGraphExecutor : public SSAGraphExecutor { public: - ThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, - const std::vector> &local_scopes, - const std::vector &places, - std::unique_ptr &&graph); + ThreadedSSAGraphExecutor(const ExecutionStrategy &strategy, + const std::vector &local_scopes, + const std::vector &places, + std::unique_ptr &&graph); const ir::Graph &Graph() const override { return *graph_; } // Run a SSAGraph by a thread pool @@ -58,7 +57,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { private: std::unique_ptr graph_; std::unique_ptr<::ThreadPool> pool_; - std::vector> local_scopes_; + std::vector local_scopes_; std::vector places_; platform::DeviceContextPool fetch_ctxs_; ExceptionHolder exception_holder_; diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 93c74deb3e..5b8c75a93d 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -39,8 +39,7 @@ std::unique_ptr ApplyParallelExecutorPass( const ProgramDesc &main_program, const std::vector &places, const std::string &loss_var_name, const std::unordered_set ¶m_names, - const std::vector> &local_scopes, - const bool use_cuda, + const std::vector &local_scopes, const bool use_cuda, #ifdef PADDLE_WITH_CUDA const BuildStrategy &strategy, platform::NCCLContextMap *nccl_ctxs) { #else @@ -67,8 +66,8 @@ std::unique_ptr ApplyParallelExecutorPass( &loss_var_name); multi_devices_pass->SetNotOwned>( "params", ¶m_names); - multi_devices_pass->SetNotOwned>>( - "local_scopes", &local_scopes); + multi_devices_pass->SetNotOwned>("local_scopes", + &local_scopes); multi_devices_pass->SetNotOwned("strategy", &strategy); #ifdef PADDLE_WITH_CUDA @@ -101,8 +100,8 @@ class ParallelExecutorPrivate { : places_(places) {} std::vector places_; - std::vector> local_scopes_; - std::shared_ptr global_scope_; + std::vector local_scopes_; + Scope *global_scope_; std::unique_ptr executor_; #ifdef PADDLE_WITH_CUDA @@ -113,7 +112,7 @@ class ParallelExecutorPrivate { bool use_all_reduce_; }; -std::vector> &ParallelExecutor::GetLocalScopes() { +std::vector &ParallelExecutor::GetLocalScopes() { return member_->local_scopes_; } @@ -122,8 +121,7 @@ ParallelExecutor::ParallelExecutor( const std::unordered_set ¶ms, const std::unordered_set &bcast_vars, const ProgramDesc &main_program, const std::string &loss_var_name, - const std::shared_ptr &scope, - const std::vector> &local_scopes, + Scope *scope, const std::vector &local_scopes, const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy, size_t num_trainers, size_t trainer_id) : member_(new ParallelExecutorPrivate(places)) { @@ -144,13 +142,13 @@ ParallelExecutor::ParallelExecutor( member_->own_local_scope_ = true; member_->local_scopes_.emplace_back(member_->global_scope_); for (size_t i = 1; i < member_->places_.size(); ++i) { - member_->local_scopes_.emplace_back(scope->NewSharedScope()); + member_->local_scopes_.emplace_back(&scope->NewScope()); } } else { member_->own_local_scope_ = false; PADDLE_ENFORCE_EQ(member_->places_.size(), local_scopes.size()); for (size_t i = 0; i < member_->places_.size(); ++i) { - member_->local_scopes_.emplace_back(local_scopes[i]->NewSharedScope()); + member_->local_scopes_.emplace_back(&local_scopes[i]->NewScope()); } } @@ -323,7 +321,7 @@ void ParallelExecutor::FeedTensorsIntoLocalScopes( for (size_t i = 0; i < tensors.size(); ++i) { auto &map = tensors[i]; - auto &scope = member_->local_scopes_[i]; + auto *scope = member_->local_scopes_[i]; for (auto &pair : map) { auto *trg = scope->Var(pair.first)->GetMutable(); trg->ShareDataWith(pair.second); @@ -353,15 +351,11 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( ParallelExecutor::~ParallelExecutor() { if (member_->own_local_scope_) { - std::vector local_scopes_ptrs; - local_scopes_ptrs.reserve(member_->local_scopes_.size()); for (size_t i = 1; i < member_->local_scopes_.size(); ++i) { - local_scopes_ptrs.emplace_back(member_->local_scopes_[i].get()); - member_->local_scopes_[i].reset(); - } - - for (size_t i = 0; i != local_scopes_ptrs.size(); ++i) { - member_->global_scope_->DeleteScope(local_scopes_ptrs[i]); + Scope *local_scope = member_->local_scopes_[i]; + if (member_->global_scope_->HasKid(local_scope)) { + member_->global_scope_->DeleteScope(local_scope); + } } } } diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index ce1076e44b..5fb748fa20 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -39,20 +39,19 @@ class ParallelExecutor { DISABLE_COPY_AND_ASSIGN(ParallelExecutor); public: - explicit ParallelExecutor( - const std::vector &places, - const std::unordered_set ¶ms, - const std::unordered_set &bcast_vars, - const ProgramDesc &main_program, const std::string &loss_var_name, - const std::shared_ptr &scope, - const std::vector> &local_scopes, - const ExecutionStrategy &exec_strategy, - const BuildStrategy &build_strategy, size_t num_trainers = 1, - size_t trainer_id = 0); + explicit ParallelExecutor(const std::vector &places, + const std::unordered_set ¶ms, + const std::unordered_set &bcast_vars, + const ProgramDesc &main_program, + const std::string &loss_var_name, Scope *scope, + const std::vector &local_scopes, + const ExecutionStrategy &exec_strategy, + const BuildStrategy &build_strategy, + size_t num_trainers = 1, size_t trainer_id = 0); ~ParallelExecutor(); - std::vector> &GetLocalScopes(); + std::vector &GetLocalScopes(); /** * Feed tensors to local scopes. The size of tensors should be equal to the diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index fa6bf4429d..2be655b89a 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -38,8 +38,8 @@ Scope::~Scope() { DropKids(); } Scope& Scope::NewScope() const { std::unique_lock lock(mutex_); - kids_.push_back(std::shared_ptr(new Scope(this))); - return kids_.back().get(); + kids_.push_back(new Scope(this)); + return *kids_.back(); } Variable* Scope::Var(const std::string& name) { @@ -68,9 +68,16 @@ const Scope* Scope::FindScope(const Variable* var) const { void Scope::DropKids() { std::unique_lock lock(mutex_); + for (Scope* s : kids_) delete s; kids_.clear(); } +bool Scope::HasKid(const Scope* scope) const { + std::unique_lock lock(mutex_); + auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); + return it != this->kids_.end(); +} + std::vector Scope::LocalVarNames() const { std::unique_lock lock(mutex_); std::vector known_vars; @@ -83,12 +90,8 @@ std::vector Scope::LocalVarNames() const { void Scope::DeleteScope(Scope* scope) const { std::unique_lock lock(mutex_); - auto it = std::find_if(this->kids_.begin(), this->kids_.end(), - [&scope](const std::shared_ptr& kid) { - return kid.get() == scope; - }); + auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); - it->reset(); this->kids_.erase(it); // When making memory benchmark on Fluid, we have to delete scope sync. if (FLAGS_benchmark || FLAGS_eager_delete_scope) { diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index 0ba5d34798..b6165a595d 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -71,6 +71,9 @@ class Scope { /// Drop all kids scopes belonged to this scope. void DropKids(); + /// Find if a scope exists in the kid scopes + bool HasKid(const Scope* scope) const; + // enumerate all the variables current contains. std::vector LocalVarNames() const; @@ -105,7 +108,7 @@ class Scope { Variable* FindVarLocally(const std::string& name) const; // Scope in `kids_` are owned by this class. - mutable std::list> kids_; + mutable std::list kids_; Scope const* parent_{nullptr}; DISABLE_COPY_AND_ASSIGN(Scope); diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py index e5ae95e2d9..de276755bb 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py @@ -178,7 +178,4 @@ if __name__ == '__main__': for parallel in (False, True): if use_cuda and not core.is_compiled_with_cuda(): continue - # TODO(minqiyang): remove this line after fixing the deletion - # order problem of Scope in ParallelExecutor in manylinux - if six.PY2: - main(use_cuda=use_cuda, parallel=parallel) + main(use_cuda=use_cuda, parallel=parallel) diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py index ff91be72c9..dd547f3448 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py @@ -152,7 +152,4 @@ if __name__ == '__main__': for parallel in (False, True): if use_cuda and not core.is_compiled_with_cuda(): continue - # TODO(minqiyang): remove this line after fixing the deletion - # order problem of Scope in ParallelExecutor in manylinux - if six.PY2: - main(use_cuda=use_cuda, parallel=parallel) + main(use_cuda=use_cuda, parallel=parallel) diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py index fa72c939e5..973308498b 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py @@ -155,7 +155,4 @@ if __name__ == '__main__': for parallel in (False, True): if use_cuda and not core.is_compiled_with_cuda(): continue - # TODO(minqiyang): remove this line after fixing the deletion - # order problem of Scope in ParallelExecutor in manylinux - if six.PY2: - main(use_cuda=use_cuda, parallel=parallel) + main(use_cuda=use_cuda, parallel=parallel) diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py index 440d2a3083..cb4aeb430e 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py @@ -137,7 +137,4 @@ if __name__ == '__main__': for parallel in (False, True): if use_cuda and not core.is_compiled_with_cuda(): continue - # TODO(minqiyang): remove this line after fixing the deletion - # order problem of Scope in ParallelExecutor in manylinux - if six.PY2: - main(use_cuda=use_cuda, parallel=parallel) + main(use_cuda=use_cuda, parallel=parallel) -- GitLab