diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 31a49575f19fa55bc674b734a99c6a07bc040b3e..d3e846d10d2198832149e13c5e61bccef681e623 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -365,8 +365,6 @@ struct NCCLAllReduceOpHandle : public OpHandle { auto &p = static_cast(in)->place_; in->generated_op_->Wait(dev_ctx_[p]); } - VLOG(3) << "Before NCCL"; - PADDLE_ENFORCE(cudaDeviceSynchronize()); auto &var_name = static_cast(this->inputs_[0])->name_; int dtype = -1; @@ -395,7 +393,6 @@ struct NCCLAllReduceOpHandle : public OpHandle { nccl_ctx.comm, nccl_ctx.stream()); } platform::dynload::ncclGroupEnd(); - PADDLE_ENFORCE(cudaDeviceSynchronize()); } } };