diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 6408ecdd376494cc9988666f12037f62ce4e183c..07dfddfa305fa5eab8a29f10bc6d6362a6a8e826 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -810,19 +810,13 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } } - fetch_ops.clear(); - *member_->global_scope_->Var(fetched_var_name)->GetMutable() = - fetched_data->tensors_; - // FIXME: - // It could be optimized by using multiple events in an operator. - // Manually sync computation during iter. - for (auto &s : member_->communication_streams_) { - s.second.ctx_->Wait(); - } - for (auto &p : member_->places_) { platform::DeviceContextPool::Instance().Get(p)->Wait(); } + + fetch_ops.clear(); + *member_->global_scope_->Var(fetched_var_name)->GetMutable() = + fetched_data->tensors_; } void ParallelExecutor::RunOp(