diff --git a/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc b/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc index bb1f415128e58cc166a78e069fbf3a84c60951e6..128aaa33a2c60e62fdca13768cdc0a815167f3ef 100644 --- a/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc @@ -34,7 +34,7 @@ ParallelSSAGraphExecutor::ParallelSSAGraphExecutor( ? 1UL : strategy_.num_threads_ / places_.size(); VLOG(1) << "set num_threads: " << strategy_.num_threads_ - << " to schedule operators on each device."; + << " to run the operators of the graph on each device."; for (size_t i = 0; i < places.size(); ++i) { executors_.emplace_back(new details::ThreadedSSAGraphExecutor( strategy_, {local_scopes_[i]}, {places_[i]}, std::move(graphs_[i]))); @@ -45,10 +45,10 @@ FeedFetchList ParallelSSAGraphExecutor::Run( const std::vector &fetch_tensors) { std::vector> run_futures; - std::vector fetch_datas; + std::vector fetch_data; FeedFetchList ret; - fetch_datas.reserve(places_.size()); + fetch_data.reserve(places_.size()); ret.reserve(fetch_tensors.size()); exception_holder_.Clear(); @@ -65,7 +65,7 @@ FeedFetchList ParallelSSAGraphExecutor::Run( if (pool_) { run_futures.emplace_back(pool_->enqueue(std::move(call))); } else { - fetch_datas.emplace_back(std::move(call())); + fetch_data.emplace_back(std::move(call())); } } @@ -74,7 +74,7 @@ FeedFetchList ParallelSSAGraphExecutor::Run( if (exception_holder_.IsCaught()) { f.wait(); } else { - fetch_datas.emplace_back(std::move(f.get())); + fetch_data.emplace_back(std::move(f.get())); } } } @@ -86,7 +86,7 @@ FeedFetchList ParallelSSAGraphExecutor::Run( std::vector lodtensor_ptrs; lodtensor_ptrs.reserve(local_scopes_.size()); for (size_t scope_idx = 0; scope_idx < local_scopes_.size(); ++scope_idx) { - lodtensor_ptrs.push_back(&fetch_datas.at(scope_idx).at(fetch_idx)); + lodtensor_ptrs.push_back(&fetch_data.at(scope_idx).at(fetch_idx)); } ret.emplace_back(); ret.back().MergeLoDTensor(lodtensor_ptrs, platform::CPUPlace()); diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 176c1db349c09bea8974db8428c2c7f905932ba6..5a3f5e9e695559d37732dd3d255316294d21d90c 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -469,8 +469,9 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( bool ParallelExecutor::EnableParallelGraphExecution( const ProgramDesc &main_program, const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy) const { - bool enable_parallel_graph = true; + if (!FLAGS_enable_parallel_graph) return false; + bool enable_parallel_graph = true; // TODO(Yancey1989): support sparse update in ParallelGraph mode. for (auto &var_desc : main_program.Block(0).AllVars()) { if (var_desc->GetType() == proto::VarType::SELECTED_ROWS) { @@ -492,7 +493,7 @@ bool ParallelExecutor::EnableParallelGraphExecution( if (build_strategy.enable_sequential_execution_ || exec_strategy.type_ == ExecutionStrategy::ExecutorType::kExperimental) enable_parallel_graph = false; - return enable_parallel_graph && FLAGS_enable_parallel_graph; + return enable_parallel_graph; } ParallelExecutor::~ParallelExecutor() {