[PHI]Move elementwise div/mul of XPU kernel to PHI (#45581)

* move elementwise test=kunlun * move add/sub/mul/div kernel to elementwise_kernel, test=kunlun * fix ci bugs,test=kunlun * fix ci bugs * test=kunlun

[PHI]Move elementwise div/mul of XPU kernel to PHI (#45581)
* move elementwise test=kunlun * move add/sub/mul/div kernel to elementwise_kernel, test=kunlun * fix ci bugs,test=kunlun * fix ci bugs * test=kunlun
f41b8566 · YuanRisheng · GitHub · ef5344cb · ef5344cb · ef5344cb
18 changed file
--- a/paddle/fluid/operators/elementwise/elementwise_div_op_xpu.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_div_op_xpu.cc
-/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#ifdef PADDLE_WITH_XPU
-#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
-#include "paddle/fluid/operators/elementwise/elementwise_op.h"
-#include "paddle/fluid/operators/elementwise/elementwise_xpu.h"
-namespace paddle {
-namespace operators {
-template <typename T>
-class ElementwiseDivXPUKernel : public framework::OpKernel<T> {
-  using XPUType = typename XPUTypeTrait<T>::Type;
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    XPUElementwise<T, XPUType>(ctx, xpu::broadcast_div<XPUType>);
-  }
-};
-template <typename T>
-class ElementwiseDivGradXPUKernel : public ElemwiseGradKernel<T> {
-  using XPUType = typename XPUTypeTrait<T>::Type;
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    ElemwiseGradKernel<T>::Compute(ctx);
-    XPUElementwiseGrad<T, XPUType>(ctx, xpu::broadcast_div_grad<XPUType>, true);
-  }
-};
-}  // namespace operators
-}  // namespace paddle
-namespace ops = paddle::operators;
-REGISTER_OP_XPU_KERNEL(elementwise_div,
-                       ops::ElementwiseDivXPUKernel<float>,
-                       ops::ElementwiseDivXPUKernel<paddle::platform::float16>);
-REGISTER_OP_XPU_KERNEL(
-    elementwise_div_grad,
-    ops::ElementwiseDivGradXPUKernel<float>,
-    ops::ElementwiseDivGradXPUKernel<paddle::platform::float16>);
-#endif
--- a/paddle/fluid/operators/elementwise/elementwise_mul_op_xpu.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_mul_op_xpu.cc
-/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#ifdef PADDLE_WITH_XPU
-#include "paddle/fluid/operators/elementwise/elementwise_mul_op.h"
-#include "paddle/fluid/operators/elementwise/elementwise_op.h"
-#include "paddle/fluid/operators/elementwise/elementwise_xpu.h"
-namespace paddle {
-namespace operators {
-template <typename T>
-class ElementwiseMulXPUKernel : public framework::OpKernel<T> {
-  using XPUType = typename XPUTypeTrait<T>::Type;
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    XPUElementwise<T, XPUType>(ctx, xpu::broadcast_mul<XPUType>);
-  }
-};
-template <typename T>
-class ElementwiseMulGradXPUKernel : public ElemwiseGradKernel<T> {
-  using XPUType = typename XPUTypeTrait<T>::Type;
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    ElemwiseGradKernel<T>::Compute(ctx);
-    XPUElementwiseGrad<T, XPUType>(ctx, xpu::broadcast_mul_grad<XPUType>, true);
-  }
-};
-}  // namespace operators
-}  // namespace paddle
-namespace ops = paddle::operators;
-REGISTER_OP_XPU_KERNEL(elementwise_mul,
-                       ops::ElementwiseMulXPUKernel<float>,
-                       ops::ElementwiseMulXPUKernel<paddle::platform::float16>);
-REGISTER_OP_XPU_KERNEL(
-    elementwise_mul_grad,
-    ops::ElementwiseMulGradXPUKernel<float>,
-    ops::ElementwiseMulGradXPUKernel<paddle::platform::float16>);
-#endif
--- a/paddle/phi/kernels/cpu/elementwise_add_kernel.cc
+++ b/paddle/phi/kernels/cpu/elementwise_add_kernel.cc
@@ -25,21 +25,12 @@ namespace phi {
 // Create the definition of Add
 DEFINE_CPU_ELEMENTWISE_OP(Add)
-template <typename T, typename Context>
-void AddKernel(const Context& dev_ctx,
-               const DenseTensor& x,
-               const DenseTensor& y,
-               DenseTensor* out) {
-  int axis = -1;
-  AddRawKernel<T>(dev_ctx, x, y, axis, out);
-}
 template <typename T, typename Context>
 void GradAddKernel(const Context& dev_ctx,
                   const DenseTensor& x,
                   const DenseTensor& y,
                   DenseTensor* out) {
-  AddKernel<T>(dev_ctx, x, y, out);
+  AddRawKernel<T>(dev_ctx, x, y, -1, out);
 }
 }  // namespace phi
@@ -62,18 +53,6 @@ PD_REGISTER_KERNEL(add_raw,
                   complex64,
                   complex128) {}
-PD_REGISTER_KERNEL(add,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::AddKernel,
-                   float,
-                   double,
-                   int16_t,
-                   int,
-                   int64_t,
-                   complex64,
-                   complex128) {}
 PD_REGISTER_KERNEL(grad_add,
                   CPU,
                   ALL_LAYOUT,

--- a/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc
+++ b/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc
@@ -46,15 +46,6 @@ void DivideRawKernel(const Context& dev_ctx,
  }
 }
-template <typename T, typename Context>
-void DivideKernel(const Context& dev_ctx,
-                  const DenseTensor& x,
-                  const DenseTensor& y,
-                  DenseTensor* out) {
-  int axis = -1;
-  DivideRawKernel<T>(dev_ctx, x, y, axis, out);
-}
 }  // namespace phi
 using complex64 = ::phi::dtype::complex<float>;
@@ -73,13 +64,3 @@ PD_REGISTER_KERNEL(divide_raw,
                   int64_t,
                   complex64,
                   complex128) {}
-PD_REGISTER_KERNEL(divide,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::DivideKernel,
-                   float,
-                   double,
-                   int,
-                   int64_t,
-                   complex64,
-                   complex128) {}
--- a/paddle/phi/kernels/cpu/elementwise_multiply_kernel.cc
+++ b/paddle/phi/kernels/cpu/elementwise_multiply_kernel.cc
@@ -25,15 +25,6 @@ namespace phi {
 // Create the definition of Multiply
 DEFINE_CPU_ELEMENTWISE_OP(Multiply)
-template <typename T, typename Context>
-void MultiplyKernel(const Context& dev_ctx,
-                    const DenseTensor& x,
-                    const DenseTensor& y,
-                    DenseTensor* out) {
-  int axis = -1;
-  MultiplyRawKernel<T>(dev_ctx, x, y, axis, out);
-}
 }  // namespace phi
 using complex64 = ::phi::dtype::complex<float>;
@@ -54,16 +45,3 @@ PD_REGISTER_KERNEL(multiply_raw,
                   complex64,
                   complex128,
                   phi::dtype::bfloat16) {}
-PD_REGISTER_KERNEL(multiply,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::MultiplyKernel,
-                   float,
-                   double,
-                   int,
-                   int64_t,
-                   bool,
-                   complex64,
-                   complex128,
-                   phi::dtype::bfloat16) {}
--- a/paddle/phi/kernels/cpu/elementwise_subtract_kernel.cc
+++ b/paddle/phi/kernels/cpu/elementwise_subtract_kernel.cc
@@ -25,15 +25,6 @@ namespace phi {
 // Create the definition of Subtract
 DEFINE_CPU_ELEMENTWISE_OP(Subtract)
-template <typename T, typename Context>
-void SubtractKernel(const Context& dev_ctx,
-                    const DenseTensor& x,
-                    const DenseTensor& y,
-                    DenseTensor* out) {
-  int axis = -1;
-  SubtractRawKernel<T>(dev_ctx, x, y, axis, out);
-}
 }  // namespace phi
 using complex64 = ::phi::dtype::complex<float>;
@@ -54,15 +45,3 @@ PD_REGISTER_KERNEL(subtract_raw,
                   complex64,
                   complex128,
                   phi::dtype::bfloat16) {}
-PD_REGISTER_KERNEL(subtract,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::SubtractKernel,
-                   float,
-                   double,
-                   int16_t,
-                   int,
-                   int64_t,
-                   complex64,
-                   complex128,
-                   phi::dtype::bfloat16) {}
--- a/paddle/phi/kernels/elementwise_kernel.cc
+++ b/paddle/phi/kernels/elementwise_kernel.cc
@@ -13,6 +13,10 @@
 // limitations under the License.
 #include "paddle/phi/kernels/elementwise_kernel.h"
+#include "paddle/phi/kernels/elementwise_add_kernel.h"
+#include "paddle/phi/kernels/elementwise_divide_kernel.h"
+#include "paddle/phi/kernels/elementwise_multiply_kernel.h"
+#include "paddle/phi/kernels/elementwise_subtract_kernel.h"
 #include "paddle/phi/backends/all_context.h"
 #include "paddle/phi/core/kernel_registry.h"
@@ -73,8 +77,40 @@ void ElementwiseHeavisideKernel(const Context& dev_ctx,
  ElementwiseHeavisideRawKernel<T>(dev_ctx, x, y, axis, out);
 }
-}  // namespace phi
+template <typename T, typename Context>
+void DivideKernel(const Context& dev_ctx,
+                  const DenseTensor& x,
+                  const DenseTensor& y,
+                  DenseTensor* out) {
+  DivideRawKernel<T, Context>(dev_ctx, x, y, -1, out);
+}
+template <typename T, typename Context>
+void MultiplyKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& y,
+                    DenseTensor* out) {
+  MultiplyRawKernel<T, Context>(dev_ctx, x, y, -1, out);
+}
+template <typename T, typename Context>
+void AddKernel(const Context& dev_ctx,
+               const DenseTensor& x,
+               const DenseTensor& y,
+               DenseTensor* out) {
+  AddRawKernel<T, Context>(dev_ctx, x, y, -1, out);
+}
+template <typename T, typename Context>
+void SubtractKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& y,
+                    DenseTensor* out) {
+  int axis = -1;
+  SubtractRawKernel<T>(dev_ctx, x, y, axis, out);
+}
+}  // namespace phi
 using complex64 = ::phi::dtype::complex<float>;
 using complex128 = ::phi::dtype::complex<double>;
@@ -123,6 +159,55 @@ PD_REGISTER_KERNEL(elementwise_pow,
                   int,
                   int64_t) {}
+PD_REGISTER_KERNEL(subtract,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::SubtractKernel,
+                   float,
+                   double,
+                   int16_t,
+                   int,
+                   int64_t,
+                   complex64,
+                   complex128,
+                   phi::dtype::bfloat16) {}
+PD_REGISTER_KERNEL(add,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::AddKernel,
+                   float,
+                   double,
+                   int16_t,
+                   int,
+                   int64_t,
+                   complex64,
+                   complex128) {}
+PD_REGISTER_KERNEL(multiply,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::MultiplyKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   bool,
+                   complex64,
+                   complex128,
+                   phi::dtype::bfloat16) {}
+PD_REGISTER_KERNEL(divide,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::DivideKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   complex64,
+                   complex128) {}
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 PD_REGISTER_KERNEL(maximum,
@@ -171,4 +256,83 @@ PD_REGISTER_KERNEL(elementwise_pow,
                   double,
                   int,
                   int64_t) {}
+#endif
+#if defined(PADDLE_WITH_XPU_KP) && !defined(PADDLE_WITH_XPU)
+PD_REGISTER_KERNEL(subtract, KPS, ALL_LAYOUT, phi::SubtractKernel, float) {}
+PD_REGISTER_KERNEL(add, KPS, ALL_LAYOUT, phi::AddKernel, float) {}
+PD_REGISTER_KERNEL(multiply, KPS, ALL_LAYOUT, phi::MultiplyKernel, float) {}
+PD_REGISTER_KERNEL(divide, KPS, ALL_LAYOUT, phi::DivideKernel, float) {}
+#elif defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+PD_REGISTER_KERNEL(subtract,
+                   KPS,
+                   ALL_LAYOUT,
+                   phi::SubtractKernel,
+                   float,
+                   double,
+                   int16_t,
+                   int,
+                   int64_t,
+                   phi::dtype::float16,
+                   complex64,
+                   complex128,
+                   phi::dtype::bfloat16) {}
+PD_REGISTER_KERNEL(add,
+                   KPS,
+                   ALL_LAYOUT,
+                   phi::AddKernel,
+                   float,
+                   double,
+                   int16_t,
+                   int,
+                   int64_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16,
+                   complex64,
+                   complex128) {}
+PD_REGISTER_KERNEL(multiply,
+                   KPS,
+                   ALL_LAYOUT,
+                   phi::MultiplyKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   bool,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16,
+                   complex64,
+                   complex128) {}
+PD_REGISTER_KERNEL(divide,
+                   KPS,
+                   ALL_LAYOUT,
+                   phi::DivideKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16,
+                   complex64,
+                   complex128) {}
+#endif
+#if defined(PADDLE_WITH_XPU) && !defined(PADDLE_WITH_XPU_KP)
+PD_REGISTER_KERNEL(
+    divide, XPU, ALL_LAYOUT, phi::DivideKernel, phi::dtype::float16, float) {}
+PD_REGISTER_KERNEL(
+    add, XPU, ALL_LAYOUT, phi::AddKernel, phi::dtype::float16, float) {}
+PD_REGISTER_KERNEL(multiply,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::MultiplyKernel,
+                   phi::dtype::float16,
+                   float) {}
 #endif
--- a/paddle/phi/kernels/kps/elementwise_add_kernel.cu
+++ b/paddle/phi/kernels/kps/elementwise_add_kernel.cu
@@ -24,27 +24,17 @@ namespace phi {
 DEFINE_CUDA_ELEMENTWISE_OP(Add)
-template <typename T, typename Context>
-void AddKernel(const Context& dev_ctx,
-               const DenseTensor& x,
-               const DenseTensor& y,
-               DenseTensor* out) {
-  int axis = -1;
-  AddRawKernel<T>(dev_ctx, x, y, axis, out);
-}
 template <typename T, typename Context>
 void GradAddKernel(const Context& dev_ctx,
                   const DenseTensor& x,
                   const DenseTensor& y,
                   DenseTensor* out) {
-  AddKernel<T>(dev_ctx, x, y, out);
+  AddRawKernel<T>(dev_ctx, x, y, -1, out);
 }
 }  // namespace phi
 #ifdef PADDLE_WITH_XPU_KP
-PD_REGISTER_KERNEL(add, KPS, ALL_LAYOUT, phi::AddKernel, float) {}
 PD_REGISTER_KERNEL(add_raw, KPS, ALL_LAYOUT, phi::AddRawKernel, float) {}
 #else
@@ -66,19 +56,6 @@ PD_REGISTER_KERNEL(add_raw,
                   bfloat16,
                   complex64,
                   complex128) {}
-PD_REGISTER_KERNEL(add,
-                   KPS,
-                   ALL_LAYOUT,
-                   phi::AddKernel,
-                   float,
-                   double,
-                   int16_t,
-                   int,
-                   int64_t,
-                   phi::dtype::float16,
-                   phi::dtype::bfloat16,
-                   complex64,
-                   complex128) {}
 PD_REGISTER_KERNEL(grad_add,
                   KPS,

--- a/paddle/phi/kernels/kps/elementwise_divide_kernel.cu
+++ b/paddle/phi/kernels/kps/elementwise_divide_kernel.cu
@@ -25,19 +25,9 @@ namespace phi {
 // Create the definition of Divide
 DEFINE_CUDA_ELEMENTWISE_OP(Divide)
-template <typename T, typename Context>
-void DivideKernel(const Context& dev_ctx,
-                  const DenseTensor& x,
-                  const DenseTensor& y,
-                  DenseTensor* out) {
-  int axis = -1;
-  DivideRawKernel<T>(dev_ctx, x, y, axis, out);
-}
 }  // namespace phi
 #ifdef PADDLE_WITH_XPU_KP
-PD_REGISTER_KERNEL(divide, KPS, ALL_LAYOUT, phi::DivideKernel, float) {}
 PD_REGISTER_KERNEL(divide_raw, KPS, ALL_LAYOUT, phi::DivideRawKernel, float) {}
 #else
@@ -59,16 +49,4 @@ PD_REGISTER_KERNEL(divide_raw,
                   complex64,
                   complex128) {}
-PD_REGISTER_KERNEL(divide,
-                   KPS,
-                   ALL_LAYOUT,
-                   phi::DivideKernel,
-                   float,
-                   double,
-                   int,
-                   int64_t,
-                   phi::dtype::float16,
-                   phi::dtype::bfloat16,
-                   complex64,
-                   complex128) {}
 #endif
--- a/paddle/phi/kernels/kps/elementwise_multiply_kernel.cu
+++ b/paddle/phi/kernels/kps/elementwise_multiply_kernel.cu
@@ -25,19 +25,9 @@ namespace phi {
 // Create the definition of Multiply
 DEFINE_CUDA_ELEMENTWISE_OP(Multiply)
-template <typename T, typename Context>
-void MultiplyKernel(const Context& dev_ctx,
-                    const DenseTensor& x,
-                    const DenseTensor& y,
-                    DenseTensor* out) {
-  int axis = -1;
-  MultiplyRawKernel<T>(dev_ctx, x, y, axis, out);
-}
 }  // namespace phi
 #ifdef PADDLE_WITH_XPU_KP
-PD_REGISTER_KERNEL(multiply, KPS, ALL_LAYOUT, phi::MultiplyKernel, float) {}
 PD_REGISTER_KERNEL(
    multiply_raw, KPS, ALL_LAYOUT, phi::MultiplyRawKernel, float) {}
 #else
@@ -60,17 +50,5 @@ PD_REGISTER_KERNEL(multiply_raw,
                   complex64,
                   complex128,
                   bfloat16) {}
-PD_REGISTER_KERNEL(multiply,
-                   KPS,
-                   ALL_LAYOUT,
-                   phi::MultiplyKernel,
-                   float,
-                   double,
-                   int,
-                   int64_t,
-                   bool,
-                   phi::dtype::float16,
-                   phi::dtype::bfloat16,
-                   complex64,
-                   complex128) {}
 #endif
--- a/paddle/phi/kernels/kps/elementwise_subtract_kernel.cu
+++ b/paddle/phi/kernels/kps/elementwise_subtract_kernel.cu
@@ -25,19 +25,9 @@ namespace phi {
 // Create the definition of Subtract
 DEFINE_CUDA_ELEMENTWISE_OP(Subtract)
-template <typename T, typename Context>
-void SubtractKernel(const Context& dev_ctx,
-                    const DenseTensor& x,
-                    const DenseTensor& y,
-                    DenseTensor* out) {
-  int axis = -1;
-  SubtractRawKernel<T>(dev_ctx, x, y, axis, out);
-}
 }  // namespace phi
 #ifdef PADDLE_WITH_XPU_KP
-PD_REGISTER_KERNEL(subtract, KPS, ALL_LAYOUT, phi::SubtractKernel, float) {}
 PD_REGISTER_KERNEL(
    subtract_raw, KPS, ALL_LAYOUT, phi::SubtractRawKernel, float) {}
 #else
@@ -60,17 +50,5 @@ PD_REGISTER_KERNEL(subtract_raw,
                   bfloat16,
                   complex64,
                   complex128) {}
-PD_REGISTER_KERNEL(subtract,
-                   KPS,
-                   ALL_LAYOUT,
-                   phi::SubtractKernel,
-                   float,
-                   double,
-                   int16_t,
-                   int,
-                   int64_t,
-                   phi::dtype::float16,
-                   complex64,
-                   complex128,
-                   phi::dtype::bfloat16) {}
 #endif
--- a/paddle/phi/kernels/xpu/elementwise.h
+++ b/paddle/phi/kernels/xpu/elementwise.h
-/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@ limitations under the License. */
 #include <utility>
 #include <vector>
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/backends/xpu/xpu_context.h"
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/dense_tensor.h"
@@ -88,13 +89,7 @@ void XPUElementwise(const XPUContext& dev_ctx,
             reinterpret_cast<XPUType*>(z_data),
             x_dims_vec,
             y_dims_vec);
-  PADDLE_ENFORCE_EQ(
+  PADDLE_ENFORCE_XDNN_SUCCESS(ret, "elementwise");
-      ret,
-      xpu::SUCCESS,
-      errors::External(
-          "XPU kernel Elementwise occur error in XPUElementwise error code ",
-          ret,
-          XPUAPIErrorMsg[ret]));
 }
 template <typename T, typename XPUType>
@@ -177,13 +172,7 @@ void XPUElementwiseGrad(const XPUContext& dev_ctx,
                 reinterpret_cast<XPUType*>(dx_data),
                 x_dims_vec,
                 y_dims_vec);
-  PADDLE_ENFORCE_EQ(
+  PADDLE_ENFORCE_XDNN_SUCCESS(ret, "elementwise");
-      ret,
-      xpu::SUCCESS,
-      errors::External(
-          "XPU kernel Elementwise occur error in XPUElementwise error code ",
-          ret,
-          XPUAPIErrorMsg[ret]));
 }
 }  // namespace phi

--- a/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc
@@ -12,11 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "paddle/phi/kernels/elementwise_add_grad_kernel.h"
 #include <memory>
 #include <string>
-#include "paddle/phi/kernels/elementwise_add_grad_kernel.h"
 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/backends/xpu/xpu_context.h"
 #include "paddle/phi/backends/xpu/xpu_header.h"

--- a/paddle/phi/kernels/xpu/elementwise_add_kernel.cc
+++ b/paddle/phi/kernels/xpu/elementwise_add_kernel.cc
@@ -12,11 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "paddle/phi/kernels/elementwise_add_kernel.h"
 #include <memory>
 #include <string>
-#include "paddle/phi/kernels/elementwise_add_kernel.h"
 #include "paddle/phi/api/ext/dispatch.h"
 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/backends/xpu/xpu_context.h"
@@ -58,14 +58,6 @@ void AddRawKernel(const Context& dev_ctx,
      dev_ctx, x, y, axis, out, xpu::broadcast_add<XPUType>);
 }
-template <typename T, typename Context>
-void AddKernel(const Context& dev_ctx,
-               const DenseTensor& x,
-               const DenseTensor& y,
-               DenseTensor* out) {
-  AddRawKernel<T, Context>(dev_ctx, x, y, -1, out);
-}
 }  // namespace phi
 PD_REGISTER_KERNEL(grad_add,
@@ -76,6 +68,3 @@ PD_REGISTER_KERNEL(grad_add,
                   float) {}
 PD_REGISTER_KERNEL(
    add_raw, XPU, ALL_LAYOUT, phi::AddRawKernel, phi::dtype::float16, float) {}
-PD_REGISTER_KERNEL(
-    add, XPU, ALL_LAYOUT, phi::AddKernel, phi::dtype::float16, float) {}
--- a/paddle/phi/kernels/xpu/elementwise_divide_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/elementwise_divide_grad_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/kernels/elementwise_divide_grad_kernel.h"
+#include <memory>
+#include <string>
+#include "paddle/phi/backends/xpu/xpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/elementwise_base.h"
+#include "paddle/phi/kernels/xpu/elementwise.h"
+namespace phi {
+template <typename T, typename Context>
+void DivideGradKernel(const Context& dev_ctx,
+                      const DenseTensor& x,
+                      const DenseTensor& y,
+                      const DenseTensor& out,
+                      const DenseTensor& dout,
+                      int axis,
+                      DenseTensor* dx,
+                      DenseTensor* dy) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  funcs::ElementwiseGradPreProcess(dout, dx);
+  XPUElementwiseGrad<T, XPUType>(dev_ctx,
+                                 x,
+                                 y,
+                                 dout,
+                                 axis,
+                                 dx,
+                                 dy,
+                                 xpu::broadcast_div_grad<XPUType>,
+                                 true);
+}
+}  // namespace phi
+PD_REGISTER_KERNEL(divide_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::DivideGradKernel,
+                   phi::dtype::float16,
+                   float) {}
--- a/paddle/phi/kernels/xpu/elementwise_divide_kernel.cc
+++ b/paddle/phi/kernels/xpu/elementwise_divide_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/kernels/elementwise_divide_kernel.h"
+#include <memory>
+#include <string>
+#include "paddle/phi/backends/xpu/xpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/elementwise_base.h"
+#include "paddle/phi/kernels/xpu/elementwise.h"
+namespace phi {
+template <typename T, typename Context>
+void DivideRawKernel(const Context& dev_ctx,
+                     const DenseTensor& x,
+                     const DenseTensor& y,
+                     int axis,
+                     DenseTensor* out) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  XPUElementwise<T, XPUType>(
+      dev_ctx, x, y, axis, out, xpu::broadcast_div<XPUType>);
+}
+}  // namespace phi
+PD_REGISTER_KERNEL(divide_raw,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::DivideRawKernel,
+                   phi::dtype::float16,
+                   float) {}
--- a/paddle/phi/kernels/xpu/elementwise_multiply_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/elementwise_multiply_grad_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/kernels/elementwise_multiply_grad_kernel.h"
+#include <memory>
+#include <string>
+#include "paddle/phi/backends/xpu/xpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/elementwise_base.h"
+#include "paddle/phi/kernels/xpu/elementwise.h"
+namespace phi {
+template <typename T, typename Context>
+void MultiplyGradKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        const DenseTensor& y,
+                        const DenseTensor& dout,
+                        int axis,
+                        DenseTensor* dx,
+                        DenseTensor* dy) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  funcs::ElementwiseGradPreProcess(dout, dx);
+  XPUElementwiseGrad<T, XPUType>(dev_ctx,
+                                 x,
+                                 y,
+                                 dout,
+                                 axis,
+                                 dx,
+                                 dy,
+                                 xpu::broadcast_mul_grad<XPUType>,
+                                 true);
+}
+}  // namespace phi
+PD_REGISTER_KERNEL(multiply_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::MultiplyGradKernel,
+                   phi::dtype::float16,
+                   float) {}
--- a/paddle/phi/kernels/xpu/elementwise_multiply_kernel.cc
+++ b/paddle/phi/kernels/xpu/elementwise_multiply_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/kernels/elementwise_multiply_kernel.h"
+#include <memory>
+#include <string>
+#include "paddle/phi/backends/xpu/xpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/elementwise_base.h"
+#include "paddle/phi/kernels/xpu/elementwise.h"
+namespace phi {
+template <typename T, typename Context>
+void MultiplyRawKernel(const Context& dev_ctx,
+                       const DenseTensor& x,
+                       const DenseTensor& y,
+                       int axis,
+                       DenseTensor* out) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  XPUElementwise<T, XPUType>(
+      dev_ctx, x, y, axis, out, xpu::broadcast_mul<XPUType>);
+}
+}  // namespace phi
+PD_REGISTER_KERNEL(multiply_raw,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::MultiplyRawKernel,
+                   phi::dtype::float16,
+                   float) {}