diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h index 9fbefabc841e3f6940860f60d959fee97495e4c9..d09b94a3fd32952985a37cf4246c7640d2db4f56 100644 --- a/paddle/fluid/framework/details/op_handle_base.h +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -64,7 +64,8 @@ class OpHandleBase { virtual bool IsMultiDeviceTransfer() { return false; } const platform::DeviceContext *DeviceContext(platform::Place place) { - return dev_ctxes_[place]; + auto it = dev_ctxes_.find(place); + return it != dev_ctxes_.end() ? it->second : nullptr; } void SetDeviceContext(platform::Place place, platform::DeviceContext *ctx_) { diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 70ec6e90a4d0106b7f838e51b8357798daa4b10d..4576999c8ec756ee662ed91df198e1a96f9897fc 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -46,6 +46,41 @@ ExecutorPrepareContext::~ExecutorPrepareContext() { VLOG(5) << "destroy ExecutorPrepareContext"; } +template +static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op, + GarbageCollector* gc, + RefCntMap* ref_cnts) { + std::unordered_set erase_tensors; + + auto handler = [&](const VariableNameMap& name_map) { + for (auto& name_pair : name_map) { + for (auto& name : name_pair.second) { + auto it = ref_cnts->find(name); + if (it == ref_cnts->end()) continue; + if ((it->second)-- == 1) { + auto* var = scope.FindVar(name); + if (var != nullptr) { + VLOG(10) << "Erase tensor \'" << name << "\'"; + if (var->IsType()) { + erase_tensors.insert(var->GetMutable()); + } else if (var->IsType()) { + erase_tensors.insert( + var->GetMutable()->mutable_value()); + } + } + } + } + } + }; + + handler(op->Inputs()); + handler(op->Outputs()); + + if (!erase_tensors.empty()) { + gc->Add(erase_tensors); + } +} + Executor::Executor(const platform::Place& place) : place_(place) {} void Executor::Close() { @@ -331,9 +366,13 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, } int64_t max_memory_size = GetEagerDeletionThreshold(); - std::unique_ptr> gc; - if (max_memory_size >= 0) { + // WhileOp would set keep_kids to false + // WhileGradOp would need the scopes created in WhileOp + // Perhaps, we should not perform eager deletion in WhileOp + // The scopes and variables created by WhileOp would be deleted + // in WhileGradOp. + if (max_memory_size >= 0 && !keep_kids) { ctx->ResetReferenceCount(); #ifdef PADDLE_WITH_CUDA if (platform::is_gpu_place(place_)) { @@ -352,45 +391,8 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, op->Run(*local_scope, place_); if (gc != nullptr) { - std::vector erase_vars; - for (auto& input : op->Inputs()) { - for (auto& input_name : input.second) { - auto it = ctx->cur_ref_cnts_.find(input_name); - if (it == ctx->cur_ref_cnts_.end()) continue; - if (it->second == 1) { // should delete it - erase_vars.emplace_back(input_name); - ctx->cur_ref_cnts_.erase(input_name); - } else { - --(it->second); - } - } - } - - for (auto& output : op->Outputs()) { - for (auto& output_name : output.second) { - auto it = ctx->cur_ref_cnts_.find(output_name); - if (it == ctx->cur_ref_cnts_.end()) continue; - if (it->second == 1) { - erase_vars.emplace_back(output_name); - ctx->cur_ref_cnts_.erase(output_name); - } else { - --(it->second); - } - } - } - - if (!erase_vars.empty()) { - std::vector erase_tensors; - for (auto& name : erase_vars) { - auto* var = local_scope->FindVar(name); - if (var == nullptr) continue; - if (var->IsType()) { - auto* tensor = var->GetMutable(); - erase_tensors.push_back(tensor); - } - } - if (!erase_tensors.empty()) gc->Add(erase_tensors); - } + DeleteUnusedTensors(*local_scope, op.get(), gc.get(), + &(ctx->cur_ref_cnts_)); } if (FLAGS_benchmark) { diff --git a/paddle/fluid/framework/executor.h b/paddle/fluid/framework/executor.h index f0cc1338a8af50030a70a9797cbcd1b0567272b5..36b36d49c2728dbef93042158dffa26d8f56d529 100644 --- a/paddle/fluid/framework/executor.h +++ b/paddle/fluid/framework/executor.h @@ -32,38 +32,32 @@ template std::unordered_map GetNonPersistableReferenceCount( const ProgramDesc& prog, size_t block_id) { auto& block = prog.Block(block_id); - std::unordered_set ignored_vars; std::unordered_map ref_cnts; - for (auto var_desc : block.AllVars()) { - auto type = var_desc->Proto()->type().type(); - if (type != proto::VarType::LOD_TENSOR || var_desc->Persistable()) { - ignored_vars.insert(var_desc->Name()); // ignore persistable vars - } - } - - for (auto op_desc : block.AllOps()) { - for (auto& input : op_desc->Inputs()) { - for (auto& input_name : input.second) { - if (!ignored_vars.count(input_name)) { - if (ref_cnts.count(input_name)) - ++ref_cnts[input_name]; - else - ref_cnts[input_name] = 1; + auto update_ref_cnts = [&](OpDesc* op_desc, const VariableNameMap& name_map) { + for (auto& name_pair : name_map) { + for (auto& name : name_pair.second) { + auto* var_desc = block.FindVar(name); + if (var_desc == nullptr || var_desc->Persistable()) continue; + auto type = var_desc->Proto()->type().type(); + if (type != proto::VarType::LOD_TENSOR && + type != proto::VarType::SELECTED_ROWS) { + continue; } - } - } - for (auto& output : op_desc->Outputs()) { - for (auto output_name : output.second) { - if (!ignored_vars.count(output_name)) { - if (ref_cnts.count(output_name)) - ++ref_cnts[output_name]; - else - ref_cnts[output_name] = 1; + auto it = ref_cnts.find(name); + if (it != ref_cnts.end()) { + ++it->second; + } else { + ref_cnts[name] = 1; } } } + }; + + for (auto op_desc : block.AllOps()) { + update_ref_cnts(op_desc, op_desc->Inputs()); + update_ref_cnts(op_desc, op_desc->Outputs()); } return ref_cnts; } diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index f06bad6c78c05804e583f859906b88fb7b500372..8d2e66009c280426f795290fbbbf96a33c1aa637 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -64,6 +64,8 @@ ParallelExecutor::ParallelExecutor( const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy, size_t num_trainers, size_t trainer_id) : member_(new ParallelExecutorPrivate(places)) { + is_alive_.test_and_set(); + member_->global_scope_ = scope; member_->use_cuda_ = exec_strategy.use_cuda_; member_->use_all_reduce_ = @@ -246,6 +248,15 @@ void ParallelExecutor::BCastParamsToDevices( void ParallelExecutor::Run(const std::vector &fetch_tensors, const std::string &fetched_var_name) { + // If ParallelExecutor has been destructed + // just return + if (!is_alive_.test_and_set()) return; + + // If ParallelExecutor is running + if (is_running_.test_and_set()) { + PADDLE_THROW("The previous ParallelExecutor::Run() has not stopped"); + } + platform::RecordBlock b(0); #ifdef PADDLE_WITH_CUDA if (!gcs_.empty()) { @@ -259,9 +270,17 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } } #endif - auto fetch_data = member_->executor_->Run(fetch_tensors); - *member_->global_scope_->Var(fetched_var_name)->GetMutable() = - fetch_data; + try { + auto fetch_data = member_->executor_->Run(fetch_tensors); + *member_->global_scope_->Var(fetched_var_name) + ->GetMutable() = fetch_data; + is_running_.clear(); + } catch (...) { + is_running_.clear(); + if (is_alive_.test_and_set()) { + std::rethrow_exception(std::current_exception()); + } + } } void ParallelExecutor::FeedTensorsIntoLocalScopes( @@ -299,6 +318,7 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( } ParallelExecutor::~ParallelExecutor() { + is_alive_.clear(); if (member_->own_local_scope_) { for (size_t i = 1; i < member_->local_scopes_.size(); ++i) { Scope *local_scope = member_->local_scopes_[i]; @@ -307,6 +327,12 @@ ParallelExecutor::~ParallelExecutor() { } } } + + while (is_running_.test_and_set()) { + // wait unitl all threads have been stopped + } + + member_.reset(); } } // namespace framework diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index fd386a5987f11ff64964e95eb7e9b83572dc790c..b78f7173752bd33e78c9d82080c72da7066facd6 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -75,7 +75,20 @@ class ParallelExecutor { private: void BCastParamsToDevices(const std::unordered_set &vars) const; - ParallelExecutorPrivate *member_; + std::unique_ptr member_; + + // FIXME(zjl): HOT-FIX + // A flag to indicate whether ParallelExecutor is destructed. + // In Python side, when users interrupt the process manually, such as + // keyboard interrupt, ParallelExecutor may be destructed before Run() ends. + // Thus, disturbing exception messages would occur when interrupted. + // If is_alive_ is false, we would discard the last exception thrown by Run(). + // Since std::atomic_flag is always lock-free and faster than + // std::atomic, we choose std::atomic_flag to be the flag here. + std::atomic_flag is_alive_ = ATOMIC_FLAG_INIT; + + // A flag to indicate whether ParallelExecutor is running. + std::atomic_flag is_running_ = ATOMIC_FLAG_INIT; #ifdef PADDLE_WITH_CUDA // ref_cnts_ is only initialized when ParallelExecutor constructs, and then diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index 1a727a2c8c759d010606d5b605823b7252b35c69..a4abd1b1283f08fb8431fbeea0cea17c8439fdd7 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -49,18 +49,18 @@ int64_t GetEagerDeletionThreshold() { Scope::~Scope() { DropKids(); } Scope& Scope::NewScope() const { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); kids_.push_back(new Scope(this)); return *kids_.back(); } Variable* Scope::Var(const std::string& name) { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); return VarInternal(name); } Variable* Scope::Var(std::string* name) { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); auto new_name = string::Sprintf("%p.%d", this, vars_.size()); if (name != nullptr) { *name = new_name; @@ -69,29 +69,34 @@ Variable* Scope::Var(std::string* name) { } Variable* Scope::FindVar(const std::string& name) const { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); return FindVarInternal(name); } +Variable* Scope::FindLocalVar(const std::string& name) const { + std::lock_guard lock(mutex_); + return FindVarLocally(name); +} + const Scope* Scope::FindScope(const Variable* var) const { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); return FindScopeInternal(var); } void Scope::DropKids() { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); for (Scope* s : kids_) delete s; kids_.clear(); } bool Scope::HasKid(const Scope* scope) const { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); return it != this->kids_.end(); } std::vector Scope::LocalVarNames() const { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); std::vector known_vars; known_vars.reserve(this->vars_.size()); for (auto& p : vars_) { @@ -101,7 +106,7 @@ std::vector Scope::LocalVarNames() const { } void Scope::DeleteScope(Scope* scope) const { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); this->kids_.erase(it); @@ -114,7 +119,7 @@ void Scope::DeleteScope(Scope* scope) const { } void Scope::EraseVars(const std::vector& var_names) { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); std::set var_set(var_names.begin(), var_names.end()); for (auto it = vars_.begin(); it != vars_.end();) { if (var_set.find(it->first) != var_set.end()) { @@ -127,12 +132,12 @@ void Scope::EraseVars(const std::vector& var_names) { void Scope::Rename(const std::string& origin_name, const std::string& new_name) const { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); RenameInternal(origin_name, new_name); } std::string Scope::Rename(const std::string& origin_name) const { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); auto new_name = string::Sprintf("%p.%d", this, vars_.size()); RenameInternal(origin_name, new_name); return new_name; diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index e42fff1d79d92fb7ed61768a614d8cd98f6775a0..14f9f36812d690fc4a7440f2e7e6a85e9993a535 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -63,6 +63,11 @@ class Scope { /// Caller doesn't own the returned Variable. Variable* FindVar(const std::string& name) const; + /// Find a variable in the current scope. + /// Return nullptr if cannot find. + /// Caller doesn't own the returned Variable. + Variable* FindLocalVar(const std::string& name) const; + const Scope* parent() const { return parent_; } /// Find the scope or an ancestor scope that contains the given variable.