diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 25f8d7afdec1a4be68793f9d02bac19e36a4907a..66ad3f33d94f06afb06d7ad2ef156fc1697e231b 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -690,6 +690,10 @@ void ParallelExecutor::BCastParamsToGPUs( } platform::dynload::ncclGroupEnd(); } + + for (auto &stream : member_->communication_streams_) { + stream.second.ctx_->Wait(); + } } #else PADDLE_THROW("Not compiled with CUDA");