diff --git a/paddle/pten/kernels/hybird/cpu/elementwise.h b/paddle/pten/kernels/hybird/cpu/elementwise.h index e8213c8b45ddb1964f6861040fb2ec480cde950c..d503957a7626203aa930eba62bf4fee6d5b0cdb1 100644 --- a/paddle/pten/kernels/hybird/cpu/elementwise.h +++ b/paddle/pten/kernels/hybird/cpu/elementwise.h @@ -130,16 +130,15 @@ void CommonElementwiseBroadcastForward( is_xsize_larger); } -// It is a common implementation to compute binary calculation with the support -// of broadcast, supporting both CPU and GPU. -// - CPU implementation cannot support the case when x needs broadcast, thus -// this function need to be called with XxxFunctor and XxxInverseFunctor, -// like paddle/fluid/operators/elementwise/elementwise_add_op.h#L49 - L55. -// - GPU implementation supports all the broadcast cases, thus there is no need -// to define and call with XxxInverseFunctor. +// It is a common CPU implementation to compute binary calculation with the +// support of broadcast. Note: +// 1. CPU implementation cannot support the case when x needs broadcast, thus +// this function need to be called with XxxFunctor and XxxInverseFunctor, +// like AddFunctor and InverseAddFunctor. +// 2. The corresponding GPU implementation supports all the broadcast cases, +// thus there is no need to define and call with XxxInverseFunctor. // TODO(liuyiqun): optimize the CPU implementation to support all broadcast // cases and avoid the need of XxxInverseFunctor. - template void ElementwiseCompute(const paddle::platform::CPUDeviceContext &dev_ctx, const DenseTensor &x,