diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 1847a4dfa51111f9922ef45b4e9bd443f2758a26..d3122353aff7b1c5a5db779a9d81a7d915f74cfa 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -813,7 +813,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, fetch_ops.clear(); *member_->global_scope_->Var(fetched_var_name)->GetMutable() = fetched_data->tensors_; - + VLOG(3) << "Before Wait"; // FIXME: // It could be optimized by using multiple events in an operator. // Manually sync computation during iter. @@ -824,6 +824,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, for (auto &p : member_->places_) { platform::DeviceContextPool::Instance().Get(p)->Wait(); } + VLOG(3) << "Done wait"; } void ParallelExecutor::RunOp(