diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index 59a0aef480bfe1b1e63e8fb2344c5e22390a8f1b..6bca299813f166009bc33512e2154907d869cf56 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -109,7 +109,7 @@ void AllReduceOpHandle::RunImpl() { buffer, buffer, numel, static_cast(dtype), ncclSum, comm, stream)); // TODO(Yancey1989): synchronize here can get better performance - // if don't use NCCL group call, but need more profileing. + // if don't use NCCL group call, but need more profiling. if (local_scopes_.size() == 1UL) cudaStreamSynchronize(stream); }); }