提交 dc863aac 编写于 作者: M minqiyang

Add kids exists detection in Scope

上级 681514e1
......@@ -22,8 +22,7 @@ namespace framework {
namespace details {
FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor(
const ExecutionStrategy &strategy,
const std::vector<std::shared_ptr<Scope>> &local_scopes,
const ExecutionStrategy &strategy, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places,
std::unique_ptr<ir::Graph> &&graph)
: strategy_(strategy),
......
......@@ -29,9 +29,8 @@ namespace details {
class OpHandleBase;
class FastThreadedSSAGraphExecutor : public SSAGraphExecutor {
public:
FastThreadedSSAGraphExecutor(
const ExecutionStrategy &strategy,
const std::vector<std::shared_ptr<Scope>> &local_scopes,
FastThreadedSSAGraphExecutor(const ExecutionStrategy &strategy,
const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places,
std::unique_ptr<ir::Graph> &&graph);
FeedFetchList Run(const std::vector<std::string> &fetch_tensors) override;
......@@ -39,7 +38,7 @@ class FastThreadedSSAGraphExecutor : public SSAGraphExecutor {
private:
ExecutionStrategy strategy_;
std::vector<std::shared_ptr<Scope>> local_scopes_;
std::vector<Scope *> local_scopes_;
std::vector<platform::Place> places_;
std::unique_ptr<ir::Graph> graph_;
......
......@@ -22,7 +22,7 @@ namespace framework {
namespace details {
FetchOpHandle::FetchOpHandle(ir::Node *node, FeedFetchList *data, size_t offset,
std::vector<std::shared_ptr<Scope>> *local_scopes)
std::vector<Scope *> *local_scopes)
: OpHandleBase(node),
data_(data),
offset_(offset),
......
......@@ -29,7 +29,7 @@ namespace details {
struct FetchOpHandle : public OpHandleBase {
public:
FetchOpHandle(ir::Node *node, FeedFetchList *data, size_t offset,
std::vector<std::shared_ptr<Scope>> *local_scopes);
std::vector<Scope *> *local_scopes);
~FetchOpHandle();
......@@ -47,7 +47,7 @@ struct FetchOpHandle : public OpHandleBase {
private:
FeedFetchList *data_;
size_t offset_;
std::vector<std::shared_ptr<Scope>> *local_scopes_;
std::vector<Scope *> *local_scopes_;
std::vector<LoDTensor> tensors_;
};
......
......@@ -23,8 +23,7 @@ namespace paddle {
namespace framework {
namespace details {
ScopeBufferedSSAGraphExecutor::ScopeBufferedSSAGraphExecutor(
ExecutionStrategy strategy,
std::vector<std::shared_ptr<Scope>> local_scopes,
ExecutionStrategy strategy, std::vector<Scope *> local_scopes,
std::vector<VariableInfo> var_infos, std::vector<platform::Place> places,
std::unique_ptr<SSAGraphExecutor> &&underlying_executor)
: strategy_(std::move(strategy)),
......
......@@ -37,8 +37,7 @@ struct VariableInfo {
class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor {
public:
ScopeBufferedSSAGraphExecutor(
ExecutionStrategy strategy,
std::vector<std::shared_ptr<Scope>> local_scopes,
ExecutionStrategy strategy, std::vector<Scope*> local_scopes,
std::vector<VariableInfo> var_infos, std::vector<platform::Place> places,
std::unique_ptr<SSAGraphExecutor>&& underlying_executor);
......@@ -53,7 +52,7 @@ class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor {
ExecutionStrategy strategy_;
std::unique_ptr<SSAGraphExecutor> underlying_executor_;
std::vector<std::shared_ptr<Scope>> local_scopes_;
std::vector<Scope*> local_scopes_;
std::vector<VariableInfo> var_infos_;
std::vector<platform::Place> places_;
};
......
......@@ -21,8 +21,7 @@ namespace paddle {
namespace framework {
namespace details {
ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor(
const ExecutionStrategy &strategy,
const std::vector<std::shared_ptr<Scope>> &local_scopes,
const ExecutionStrategy &strategy, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places,
std::unique_ptr<ir::Graph> &&graph)
: graph_(std::move(graph)),
......
......@@ -38,9 +38,8 @@ namespace details {
class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
public:
ThreadedSSAGraphExecutor(
const ExecutionStrategy &strategy,
const std::vector<std::shared_ptr<Scope>> &local_scopes,
ThreadedSSAGraphExecutor(const ExecutionStrategy &strategy,
const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places,
std::unique_ptr<ir::Graph> &&graph);
......@@ -58,7 +57,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
private:
std::unique_ptr<ir::Graph> graph_;
std::unique_ptr<::ThreadPool> pool_;
std::vector<std::shared_ptr<Scope>> local_scopes_;
std::vector<Scope *> local_scopes_;
std::vector<platform::Place> places_;
platform::DeviceContextPool fetch_ctxs_;
ExceptionHolder exception_holder_;
......
......@@ -39,8 +39,7 @@ std::unique_ptr<ir::Graph> ApplyParallelExecutorPass(
const ProgramDesc &main_program, const std::vector<platform::Place> &places,
const std::string &loss_var_name,
const std::unordered_set<std::string> &param_names,
const std::vector<std::shared_ptr<Scope>> &local_scopes,
const bool use_cuda,
const std::vector<Scope *> &local_scopes, const bool use_cuda,
#ifdef PADDLE_WITH_CUDA
const BuildStrategy &strategy, platform::NCCLContextMap *nccl_ctxs) {
#else
......@@ -67,8 +66,8 @@ std::unique_ptr<ir::Graph> ApplyParallelExecutorPass(
&loss_var_name);
multi_devices_pass->SetNotOwned<const std::unordered_set<std::string>>(
"params", &param_names);
multi_devices_pass->SetNotOwned<const std::vector<std::shared_ptr<Scope>>>(
"local_scopes", &local_scopes);
multi_devices_pass->SetNotOwned<const std::vector<Scope *>>("local_scopes",
&local_scopes);
multi_devices_pass->SetNotOwned<const BuildStrategy>("strategy", &strategy);
#ifdef PADDLE_WITH_CUDA
......@@ -101,8 +100,8 @@ class ParallelExecutorPrivate {
: places_(places) {}
std::vector<platform::Place> places_;
std::vector<std::shared_ptr<Scope>> local_scopes_;
std::shared_ptr<Scope> global_scope_;
std::vector<Scope *> local_scopes_;
Scope *global_scope_;
std::unique_ptr<details::SSAGraphExecutor> executor_;
#ifdef PADDLE_WITH_CUDA
......@@ -113,7 +112,7 @@ class ParallelExecutorPrivate {
bool use_all_reduce_;
};
std::vector<std::shared_ptr<Scope>> &ParallelExecutor::GetLocalScopes() {
std::vector<Scope *> &ParallelExecutor::GetLocalScopes() {
return member_->local_scopes_;
}
......@@ -122,8 +121,7 @@ ParallelExecutor::ParallelExecutor(
const std::unordered_set<std::string> &params,
const std::unordered_set<std::string> &bcast_vars,
const ProgramDesc &main_program, const std::string &loss_var_name,
const std::shared_ptr<Scope> &scope,
const std::vector<std::shared_ptr<Scope>> &local_scopes,
Scope *scope, const std::vector<Scope *> &local_scopes,
const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy,
size_t num_trainers, size_t trainer_id)
: member_(new ParallelExecutorPrivate(places)) {
......@@ -144,13 +142,13 @@ ParallelExecutor::ParallelExecutor(
member_->own_local_scope_ = true;
member_->local_scopes_.emplace_back(member_->global_scope_);
for (size_t i = 1; i < member_->places_.size(); ++i) {
member_->local_scopes_.emplace_back(scope->NewSharedScope());
member_->local_scopes_.emplace_back(&scope->NewScope());
}
} else {
member_->own_local_scope_ = false;
PADDLE_ENFORCE_EQ(member_->places_.size(), local_scopes.size());
for (size_t i = 0; i < member_->places_.size(); ++i) {
member_->local_scopes_.emplace_back(local_scopes[i]->NewSharedScope());
member_->local_scopes_.emplace_back(&local_scopes[i]->NewScope());
}
}
......@@ -323,7 +321,7 @@ void ParallelExecutor::FeedTensorsIntoLocalScopes(
for (size_t i = 0; i < tensors.size(); ++i) {
auto &map = tensors[i];
auto &scope = member_->local_scopes_[i];
auto *scope = member_->local_scopes_[i];
for (auto &pair : map) {
auto *trg = scope->Var(pair.first)->GetMutable<LoDTensor>();
trg->ShareDataWith(pair.second);
......@@ -353,15 +351,11 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes(
ParallelExecutor::~ParallelExecutor() {
if (member_->own_local_scope_) {
std::vector<Scope *> local_scopes_ptrs;
local_scopes_ptrs.reserve(member_->local_scopes_.size());
for (size_t i = 1; i < member_->local_scopes_.size(); ++i) {
local_scopes_ptrs.emplace_back(member_->local_scopes_[i].get());
member_->local_scopes_[i].reset();
Scope *local_scope = member_->local_scopes_[i];
if (member_->global_scope_->HasKid(local_scope)) {
member_->global_scope_->DeleteScope(local_scope);
}
for (size_t i = 0; i != local_scopes_ptrs.size(); ++i) {
member_->global_scope_->DeleteScope(local_scopes_ptrs[i]);
}
}
}
......
......@@ -39,20 +39,19 @@ class ParallelExecutor {
DISABLE_COPY_AND_ASSIGN(ParallelExecutor);
public:
explicit ParallelExecutor(
const std::vector<platform::Place> &places,
explicit ParallelExecutor(const std::vector<platform::Place> &places,
const std::unordered_set<std::string> &params,
const std::unordered_set<std::string> &bcast_vars,
const ProgramDesc &main_program, const std::string &loss_var_name,
const std::shared_ptr<Scope> &scope,
const std::vector<std::shared_ptr<Scope>> &local_scopes,
const ProgramDesc &main_program,
const std::string &loss_var_name, Scope *scope,
const std::vector<Scope *> &local_scopes,
const ExecutionStrategy &exec_strategy,
const BuildStrategy &build_strategy, size_t num_trainers = 1,
size_t trainer_id = 0);
const BuildStrategy &build_strategy,
size_t num_trainers = 1, size_t trainer_id = 0);
~ParallelExecutor();
std::vector<std::shared_ptr<Scope>> &GetLocalScopes();
std::vector<Scope *> &GetLocalScopes();
/**
* Feed tensors to local scopes. The size of tensors should be equal to the
......
......@@ -38,8 +38,8 @@ Scope::~Scope() { DropKids(); }
Scope& Scope::NewScope() const {
std::unique_lock<std::mutex> lock(mutex_);
kids_.push_back(std::shared_ptr<Scope>(new Scope(this)));
return kids_.back().get();
kids_.push_back(new Scope(this));
return *kids_.back();
}
Variable* Scope::Var(const std::string& name) {
......@@ -68,9 +68,16 @@ const Scope* Scope::FindScope(const Variable* var) const {
void Scope::DropKids() {
std::unique_lock<std::mutex> lock(mutex_);
for (Scope* s : kids_) delete s;
kids_.clear();
}
bool Scope::HasKid(const Scope* scope) const {
std::unique_lock<std::mutex> lock(mutex_);
auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
return it != this->kids_.end();
}
std::vector<std::string> Scope::LocalVarNames() const {
std::unique_lock<std::mutex> lock(mutex_);
std::vector<std::string> known_vars;
......@@ -83,12 +90,8 @@ std::vector<std::string> Scope::LocalVarNames() const {
void Scope::DeleteScope(Scope* scope) const {
std::unique_lock<std::mutex> lock(mutex_);
auto it = std::find_if(this->kids_.begin(), this->kids_.end(),
[&scope](const std::shared_ptr<Scope>& kid) {
return kid.get() == scope;
});
auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope);
it->reset();
this->kids_.erase(it);
// When making memory benchmark on Fluid, we have to delete scope sync.
if (FLAGS_benchmark || FLAGS_eager_delete_scope) {
......
......@@ -71,6 +71,9 @@ class Scope {
/// Drop all kids scopes belonged to this scope.
void DropKids();
/// Find if a scope exists in the kid scopes
bool HasKid(const Scope* scope) const;
// enumerate all the variables current contains.
std::vector<std::string> LocalVarNames() const;
......@@ -105,7 +108,7 @@ class Scope {
Variable* FindVarLocally(const std::string& name) const;
// Scope in `kids_` are owned by this class.
mutable std::list<std::shared_ptr<Scope>> kids_;
mutable std::list<Scope*> kids_;
Scope const* parent_{nullptr};
DISABLE_COPY_AND_ASSIGN(Scope);
......
......@@ -178,7 +178,4 @@ if __name__ == '__main__':
for parallel in (False, True):
if use_cuda and not core.is_compiled_with_cuda():
continue
# TODO(minqiyang): remove this line after fixing the deletion
# order problem of Scope in ParallelExecutor in manylinux
if six.PY2:
main(use_cuda=use_cuda, parallel=parallel)
......@@ -152,7 +152,4 @@ if __name__ == '__main__':
for parallel in (False, True):
if use_cuda and not core.is_compiled_with_cuda():
continue
# TODO(minqiyang): remove this line after fixing the deletion
# order problem of Scope in ParallelExecutor in manylinux
if six.PY2:
main(use_cuda=use_cuda, parallel=parallel)
......@@ -155,7 +155,4 @@ if __name__ == '__main__':
for parallel in (False, True):
if use_cuda and not core.is_compiled_with_cuda():
continue
# TODO(minqiyang): remove this line after fixing the deletion
# order problem of Scope in ParallelExecutor in manylinux
if six.PY2:
main(use_cuda=use_cuda, parallel=parallel)
......@@ -137,7 +137,4 @@ if __name__ == '__main__':
for parallel in (False, True):
if use_cuda and not core.is_compiled_with_cuda():
continue
# TODO(minqiyang): remove this line after fixing the deletion
# order problem of Scope in ParallelExecutor in manylinux
if six.PY2:
main(use_cuda=use_cuda, parallel=parallel)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册