diff --git a/paddle/fluid/operators/collective/c_comm_init_hccl_op.cc b/paddle/fluid/operators/collective/c_comm_init_hccl_op.cc index 3df0595525941a93b0fb4a63014021ad519651cf..7dec645b5b3ad8adbe27c1bbed43ab97dab8c4c3 100644 --- a/paddle/fluid/operators/collective/c_comm_init_hccl_op.cc +++ b/paddle/fluid/operators/collective/c_comm_init_hccl_op.cc @@ -87,6 +87,8 @@ class CCommInitOpAscend : public framework::OperatorBase { } PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclBroadcast( buff, size, HCCL_DATA_TYPE_FP32, 0, comm->comm(), stream)); + // Synchronize stream to find hccl error in time. + PADDLE_ENFORCE_NPU_SUCCESS(aclrtSynchronizeStream(stream)); VLOG(3) << "Build connection successful."; #else PADDLE_THROW(platform::errors::PreconditionNotMet(