From d3ed070e10abffd4e8315f42f3090be9a38c54b7 Mon Sep 17 00:00:00 2001 From: sneaxiy Date: Mon, 15 Oct 2018 07:27:16 +0000 Subject: [PATCH] test=develop --- paddle/fluid/framework/parallel_executor.cc | 32 ++++----------------- paddle/fluid/framework/parallel_executor.h | 13 --------- 2 files changed, 5 insertions(+), 40 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 8d2e66009c2..e8adabd2654 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -64,8 +64,6 @@ ParallelExecutor::ParallelExecutor( const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy, size_t num_trainers, size_t trainer_id) : member_(new ParallelExecutorPrivate(places)) { - is_alive_.test_and_set(); - member_->global_scope_ = scope; member_->use_cuda_ = exec_strategy.use_cuda_; member_->use_all_reduce_ = @@ -248,15 +246,6 @@ void ParallelExecutor::BCastParamsToDevices( void ParallelExecutor::Run(const std::vector &fetch_tensors, const std::string &fetched_var_name) { - // If ParallelExecutor has been destructed - // just return - if (!is_alive_.test_and_set()) return; - - // If ParallelExecutor is running - if (is_running_.test_and_set()) { - PADDLE_THROW("The previous ParallelExecutor::Run() has not stopped"); - } - platform::RecordBlock b(0); #ifdef PADDLE_WITH_CUDA if (!gcs_.empty()) { @@ -270,17 +259,9 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } } #endif - try { - auto fetch_data = member_->executor_->Run(fetch_tensors); - *member_->global_scope_->Var(fetched_var_name) - ->GetMutable() = fetch_data; - is_running_.clear(); - } catch (...) { - is_running_.clear(); - if (is_alive_.test_and_set()) { - std::rethrow_exception(std::current_exception()); - } - } + auto fetch_data = member_->executor_->Run(fetch_tensors); + *member_->global_scope_->Var(fetched_var_name)->GetMutable() = + fetch_data; } void ParallelExecutor::FeedTensorsIntoLocalScopes( @@ -318,7 +299,6 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( } ParallelExecutor::~ParallelExecutor() { - is_alive_.clear(); if (member_->own_local_scope_) { for (size_t i = 1; i < member_->local_scopes_.size(); ++i) { Scope *local_scope = member_->local_scopes_[i]; @@ -328,10 +308,8 @@ ParallelExecutor::~ParallelExecutor() { } } - while (is_running_.test_and_set()) { - // wait unitl all threads have been stopped - } - + // member_ must be destructed before gcs_ since the destructor of + // ReferenceCountOpHandle use raw pointers of gcs_ inside. member_.reset(); } diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index b78f7173752..ef09b98b2aa 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -77,19 +77,6 @@ class ParallelExecutor { std::unique_ptr member_; - // FIXME(zjl): HOT-FIX - // A flag to indicate whether ParallelExecutor is destructed. - // In Python side, when users interrupt the process manually, such as - // keyboard interrupt, ParallelExecutor may be destructed before Run() ends. - // Thus, disturbing exception messages would occur when interrupted. - // If is_alive_ is false, we would discard the last exception thrown by Run(). - // Since std::atomic_flag is always lock-free and faster than - // std::atomic, we choose std::atomic_flag to be the flag here. - std::atomic_flag is_alive_ = ATOMIC_FLAG_INIT; - - // A flag to indicate whether ParallelExecutor is running. - std::atomic_flag is_running_ = ATOMIC_FLAG_INIT; - #ifdef PADDLE_WITH_CUDA // ref_cnts_ is only initialized when ParallelExecutor constructs, and then // keeps unchanged -- GitLab