From be874c08c76a9880dd27cbc216d9102e87199e11 Mon Sep 17 00:00:00 2001
From: YuanRisheng <yuanrisheng@baidu.com>
Date: Thu, 16 Dec 2021 10:32:50 +0800
Subject: [PATCH] [Pten]Modify registered kernel name (#38109)

* Reduce reshape kernel functions in pten

* delete notes

* fix bugs when compile

* modify register name

* fix compile bugs
---
 .../elementwise/elementwise_add_op.h          |  3 +-
 .../elementwise/elementwise_div_op.h          |  3 +-
 .../elementwise/elementwise_mul_op.cu         |  3 +-
 .../elementwise/elementwise_mul_op.h          |  3 +-
 .../elementwise/elementwise_sub_op.h          |  3 +-
 paddle/fluid/operators/fill_any_like_op.h     |  2 +-
 paddle/pten/include/creation.h                |  4 +-
 paddle/pten/include/math.h                    |  8 +--
 paddle/pten/kernels/cpu/creation.cc           | 16 +++---
 paddle/pten/kernels/cpu/creation.h            | 12 ++---
 paddle/pten/kernels/cpu/math.cc               | 38 +++++++-------
 paddle/pten/kernels/cpu/math.h                | 50 +++++++++----------
 paddle/pten/kernels/cuda/creation.cu          | 16 +++---
 paddle/pten/kernels/cuda/creation.h           | 12 ++---
 paddle/pten/kernels/cuda/math.cu              | 22 ++++----
 paddle/pten/kernels/cuda/math.h               | 50 +++++++++----------
 .../hybird/general/elementwise_functor.h      | 38 +++++++-------
 .../pten/tests/kernels/test_fill_dev_api.cc   |  2 +-
 18 files changed, 137 insertions(+), 148 deletions(-)
diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h
index f0292ffe178..a4567beeb4f 100644
--- a/paddle/fluid/operators/elementwise/elementwise_add_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h
@@ -68,8 +68,7 @@ class ElementwiseAddKernel : public framework::OpKernel<T> {
     auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
     auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
     auto pt_z = paddle::experimental::MakePtenDenseTensor(*z);
-    pten::ElementwiseAdd<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis,
-                            pt_z.get());
+    pten::Add<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get());
   }
 };
 
diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.h b/paddle/fluid/operators/elementwise/elementwise_div_op.h
index 374dda9e83d..f3ba5050c4f 100644
--- a/paddle/fluid/operators/elementwise/elementwise_div_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_div_op.h
@@ -62,8 +62,7 @@ class ElementwiseDivKernel : public framework::OpKernel<T> {
     auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
     auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
     auto pt_z = paddle::experimental::MakePtenDenseTensor(*z);
-    pten::ElementwiseDiv<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis,
-                            pt_z.get());
+    pten::Divide<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get());
   }
 };
 
diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu
index 243c945d3c4..e131bc49746 100644
--- a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu
+++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu
@@ -57,8 +57,7 @@ class ElementwiseMulKernel<platform::CUDADeviceContext, T>
       auto pt_x = paddle::experimental::MakePtenDenseTensor(*x_lod);
       auto pt_y = paddle::experimental::MakePtenDenseTensor(*y_lod);
       auto pt_z = paddle::experimental::MakePtenDenseTensor(*z_lod);
-      pten::ElementwiseMul<T>(cuda_ctx, *pt_x.get(), *pt_y.get(), axis,
-                              pt_z.get());
+      pten::Multiply<T>(cuda_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get());
     } else {
       PADDLE_THROW(platform::errors::InvalidArgument(
           "X's type[%s] is not supported by elementwise_op. X's type should be "
diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h
index ba279e90a34..8b43f82e6b6 100644
--- a/paddle/fluid/operators/elementwise/elementwise_mul_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.h
@@ -129,8 +129,7 @@ class ElementwiseMulKernel : public framework::OpKernel<T> {
       auto pt_x = paddle::experimental::MakePtenDenseTensor(*x_lod);
       auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
       auto pt_z = paddle::experimental::MakePtenDenseTensor(*z_lod);
-      pten::ElementwiseMul<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis,
-                              pt_z.get());
+      pten::Multiply<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get());
     } else {
       PADDLE_THROW(platform::errors::InvalidArgument(
           "X's type[%s] is not supported by elementwise_op. X's type should be "
diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.h b/paddle/fluid/operators/elementwise/elementwise_sub_op.h
index 08a4e709a37..09a33788d41 100644
--- a/paddle/fluid/operators/elementwise/elementwise_sub_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.h
@@ -56,8 +56,7 @@ class ElementwiseSubKernel : public framework::OpKernel<T> {
     auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
     auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
     auto pt_z = paddle::experimental::MakePtenDenseTensor(*z);
-    pten::ElementwiseSub<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis,
-                            pt_z.get());
+    pten::Subtract<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get());
   }
 };
 
diff --git a/paddle/fluid/operators/fill_any_like_op.h b/paddle/fluid/operators/fill_any_like_op.h
index 2bc509709c5..3ad56827f83 100644
--- a/paddle/fluid/operators/fill_any_like_op.h
+++ b/paddle/fluid/operators/fill_any_like_op.h
@@ -65,7 +65,7 @@ class FillAnyLikeKernel : public framework::OpKernel<T> {
 
     const auto& dev_ctx = context.template device_context<DeviceContext>();
     // call new kernel
-    pten::FillAnyLike<T>(dev_ctx, value, pt_out.get());
+    pten::FullLike<T>(dev_ctx, value, pt_out.get());
   }
 };
 
diff --git a/paddle/pten/include/creation.h b/paddle/pten/include/creation.h
index 7341ea18917..69c83c74712 100644
--- a/paddle/pten/include/creation.h
+++ b/paddle/pten/include/creation.h
@@ -24,7 +24,7 @@ namespace pten {
 // TODO(YuanRisheng) This function name should be same as User API name.
 // TODO(zyfncg) Automatic code generation
 template <typename T, typename ContextT>
-DenseTensor FillAnyLike(
+DenseTensor FullLike(
     const ContextT& dev_ctx,
     const DenseTensor& x,
     const Scalar& val,
@@ -36,7 +36,7 @@ DenseTensor FillAnyLike(
       pten::make_intrusive<paddle::experimental::SharedStorage>(
           dev_ctx.GetPlace()),
       std::move(out_meta));
-  FillAnyLike<T>(dev_ctx, val, &dense_out);
+  FullLike<T>(dev_ctx, val, &dense_out);
   return dense_out;
 }
 
diff --git a/paddle/pten/include/math.h b/paddle/pten/include/math.h
index 1afad968ef5..01dd0a7fda7 100644
--- a/paddle/pten/include/math.h
+++ b/paddle/pten/include/math.h
@@ -100,7 +100,7 @@ DenseTensor Add(const ContextT& dev_ctx,
       pten::make_intrusive<paddle::experimental::SharedStorage>(
           dev_ctx.GetPlace()),
       std::move(out_meta));
-  ElementwiseAdd<T>(dev_ctx, x, y, axis, &dense_out);
+  Add<T>(dev_ctx, x, y, axis, &dense_out);
   return dense_out;
 }
 
@@ -114,7 +114,7 @@ DenseTensor Subtract(const ContextT& dev_ctx,
       pten::make_intrusive<paddle::experimental::SharedStorage>(
           dev_ctx.GetPlace()),
       std::move(out_meta));
-  ElementwiseSub<T>(dev_ctx, x, y, axis, &dense_out);
+  Subtract<T>(dev_ctx, x, y, axis, &dense_out);
   return dense_out;
 }
 
@@ -128,7 +128,7 @@ DenseTensor Divide(const ContextT& dev_ctx,
       pten::make_intrusive<paddle::experimental::SharedStorage>(
           dev_ctx.GetPlace()),
       std::move(out_meta));
-  ElementwiseDiv<T>(dev_ctx, x, y, axis, &dense_out);
+  Divide<T>(dev_ctx, x, y, axis, &dense_out);
   return dense_out;
 }
 
@@ -142,7 +142,7 @@ DenseTensor Multiply(const ContextT& dev_ctx,
       pten::make_intrusive<paddle::experimental::SharedStorage>(
           dev_ctx.GetPlace()),
       std::move(out_meta));
-  ElementwiseMul<T>(dev_ctx, x, y, axis, &dense_out);
+  Multiply<T>(dev_ctx, x, y, axis, &dense_out);
   return dense_out;
 }
 }  // namespace pten
diff --git a/paddle/pten/kernels/cpu/creation.cc b/paddle/pten/kernels/cpu/creation.cc
index f21c322e2db..2bb063999f4 100644
--- a/paddle/pten/kernels/cpu/creation.cc
+++ b/paddle/pten/kernels/cpu/creation.cc
@@ -20,9 +20,7 @@
 namespace pten {
 
 template <typename T>
-void FillAnyLike(const CPUContext& dev_ctx,
-                 const Scalar& val,
-                 DenseTensor* out) {
+void FullLike(const CPUContext& dev_ctx, const Scalar& val, DenseTensor* out) {
   auto value = val.to<float>();
   using CommonType = typename std::common_type<
       float,
@@ -51,10 +49,10 @@ void FillAnyLike(const CPUContext& dev_ctx,
 }
 
 template <typename T>
-void FillConstant(const CPUContext& dev_ctx,
-                  const ScalarArray& shape,
-                  const Scalar& val,
-                  DenseTensor* out) {
+void Full(const CPUContext& dev_ctx,
+          const ScalarArray& shape,
+          const Scalar& val,
+          DenseTensor* out) {
   out->Resize(paddle::framework::make_ddim(shape.GetData()));
   eigen::fill<CPUContext, T>(dev_ctx, out, val.to<T>());
 }
@@ -64,7 +62,7 @@ void FillConstant(const CPUContext& dev_ctx,
 PT_REGISTER_KERNEL(full_like,
                    CPU,
                    ALL_LAYOUT,
-                   pten::FillAnyLike,
+                   pten::FullLike,
                    float,
                    double,
                    int,
@@ -75,7 +73,7 @@ PT_REGISTER_KERNEL(full_like,
 PT_REGISTER_KERNEL(full,
                    CPU,
                    ALL_LAYOUT,
-                   pten::FillConstant,
+                   pten::Full,
                    float,
                    double,
                    uint8_t,
diff --git a/paddle/pten/kernels/cpu/creation.h b/paddle/pten/kernels/cpu/creation.h
index 33e0107f1ac..9d53f78026a 100644
--- a/paddle/pten/kernels/cpu/creation.h
+++ b/paddle/pten/kernels/cpu/creation.h
@@ -25,14 +25,12 @@ namespace pten {
 using CPUContext = paddle::platform::CPUDeviceContext;
 
 template <typename T>
-void FillAnyLike(const CPUContext& dev_ctx,
-                 const Scalar& val,
-                 DenseTensor* out);
+void FullLike(const CPUContext& dev_ctx, const Scalar& val, DenseTensor* out);
 
 template <typename T>
-void FillConstant(const CPUContext& dev_ctx,
-                  const ScalarArray& shape,
-                  const Scalar& val,
-                  DenseTensor* out);
+void Full(const CPUContext& dev_ctx,
+          const ScalarArray& shape,
+          const Scalar& val,
+          DenseTensor* out);
 
 }  // namespace pten
diff --git a/paddle/pten/kernels/cpu/math.cc b/paddle/pten/kernels/cpu/math.cc
index 616058d5ace..67493015c61 100644
--- a/paddle/pten/kernels/cpu/math.cc
+++ b/paddle/pten/kernels/cpu/math.cc
@@ -59,25 +59,25 @@ void Scale(const CPUContext& dev_ctx,
 }
 
 template <typename T>
-void ElementwiseDiv(const CPUContext& dev_ctx,
-                    const DenseTensor& x,
-                    const DenseTensor& y,
-                    int axis,
-                    DenseTensor* out) {
+void Divide(const CPUContext& dev_ctx,
+            const DenseTensor& x,
+            const DenseTensor& y,
+            int axis,
+            DenseTensor* out) {
   // allocate memory for out
   out->mutable_data<T>();
   if (x.dims() == y.dims() && std::is_floating_point<T>::value) {
-    SameDimsElementwiseCompute<general::SameDimsDivFunctor<CPUContext, T>>()(
+    SameDimsElementwiseCompute<general::SameDimsDivideFunctor<CPUContext, T>>()(
         dev_ctx, x, y, out);
   } else {
     auto x_dims = x.dims();
     auto y_dims = y.dims();
     if (x_dims.size() >= y_dims.size()) {
-      ElementwiseCompute<general::DivFunctor<T>, T>(
-          dev_ctx, x, y, axis, general::DivFunctor<T>(), out);
+      ElementwiseCompute<general::DivideFunctor<T>, T>(
+          dev_ctx, x, y, axis, general::DivideFunctor<T>(), out);
     } else {
-      ElementwiseCompute<general::InverseDivFunctor<T>, T>(
-          dev_ctx, x, y, axis, general::InverseDivFunctor<T>(), out);
+      ElementwiseCompute<general::InverseDivideFunctor<T>, T>(
+          dev_ctx, x, y, axis, general::InverseDivideFunctor<T>(), out);
     }
   }
 }
@@ -95,14 +95,14 @@ void Sum(const CPUContext& dev_ctx,
       dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out);
 }
 
-// Create the definition of ElementwiseAdd
+// Create the definition of Add
 DEFINE_CPU_ELEMENTWISE_OP(Add)
 
-// Create the definition of ElementwiseSub
-DEFINE_CPU_ELEMENTWISE_OP(Sub)
+// Create the definition of Subtract
+DEFINE_CPU_ELEMENTWISE_OP(Subtract)
 
-// Create the definition of ElementwiseMul
-DEFINE_CPU_ELEMENTWISE_OP(Mul)
+// Create the definition of Multiply
+DEFINE_CPU_ELEMENTWISE_OP(Multiply)
 
 }  // namespace pten
 
@@ -128,7 +128,7 @@ PT_REGISTER_KERNEL(scale,
 PT_REGISTER_KERNEL(add,
                    CPU,
                    ALL_LAYOUT,
-                   pten::ElementwiseAdd,
+                   pten::Add,
                    float,
                    double,
                    int,
@@ -138,7 +138,7 @@ PT_REGISTER_KERNEL(add,
 PT_REGISTER_KERNEL(subtract,
                    CPU,
                    ALL_LAYOUT,
-                   pten::ElementwiseSub,
+                   pten::Subtract,
                    float,
                    double,
                    int,
@@ -148,7 +148,7 @@ PT_REGISTER_KERNEL(subtract,
 PT_REGISTER_KERNEL(divide,
                    CPU,
                    ALL_LAYOUT,
-                   pten::ElementwiseDiv,
+                   pten::Divide,
                    float,
                    double,
                    int,
@@ -158,7 +158,7 @@ PT_REGISTER_KERNEL(divide,
 PT_REGISTER_KERNEL(multiply,
                    CPU,
                    ALL_LAYOUT,
-                   pten::ElementwiseMul,
+                   pten::Multiply,
                    float,
                    double,
                    int,
diff --git a/paddle/pten/kernels/cpu/math.h b/paddle/pten/kernels/cpu/math.h
index 31532f38f6e..5ee0f9f8956 100644
--- a/paddle/pten/kernels/cpu/math.h
+++ b/paddle/pten/kernels/cpu/math.h
@@ -47,32 +47,32 @@ void Scale(const CPUContext& dev_ctx,
            DenseTensor* out);
 
 template <typename T>
-void ElementwiseAdd(const CPUContext& dev_ctx,
-                    const DenseTensor& x,
-                    const DenseTensor& y,
-                    int axis,
-                    DenseTensor* out);
+void Add(const CPUContext& dev_ctx,
+         const DenseTensor& x,
+         const DenseTensor& y,
+         int axis,
+         DenseTensor* out);
 
 template <typename T>
-void ElementwiseSub(const CPUContext& dev_ctx,
-                    const DenseTensor& x,
-                    const DenseTensor& y,
-                    int axis,
-                    DenseTensor* out);
+void Subtract(const CPUContext& dev_ctx,
+              const DenseTensor& x,
+              const DenseTensor& y,
+              int axis,
+              DenseTensor* out);
 
 template <typename T>
-void ElementwiseDiv(const CPUContext& dev_ctx,
-                    const DenseTensor& x,
-                    const DenseTensor& y,
-                    int axis,
-                    DenseTensor* out);
+void Divide(const CPUContext& dev_ctx,
+            const DenseTensor& x,
+            const DenseTensor& y,
+            int axis,
+            DenseTensor* out);
 
 template <typename T>
-void ElementwiseMul(const CPUContext& dev_ctx,
-                    const DenseTensor& x,
-                    const DenseTensor& y,
-                    int axis,
-                    DenseTensor* out);
+void Multiply(const CPUContext& dev_ctx,
+              const DenseTensor& x,
+              const DenseTensor& y,
+              int axis,
+              DenseTensor* out);
 template <typename T>
 void Sum(const CPUContext& dev_ctx,
          const DenseTensor& x,
@@ -87,11 +87,11 @@ void Sum(const CPUContext& dev_ctx,
 
 #define DEFINE_CPU_ELEMENTWISE_OP(name)                                      \
   template <typename T>                                                      \
-  void Elementwise##name(const CPUContext& dev_ctx,                          \
-                         const DenseTensor& x,                               \
-                         const DenseTensor& y,                               \
-                         int axis,                                           \
-                         DenseTensor* out) {                                 \
+  void name(const CPUContext& dev_ctx,                                       \
+            const DenseTensor& x,                                            \
+            const DenseTensor& y,                                            \
+            int axis,                                                        \
+            DenseTensor* out) {                                              \
     out->mutable_data<T>();                                                  \
     if (x.dims() == y.dims()) {                                              \
       SameDimsElementwiseCompute<                                            \
diff --git a/paddle/pten/kernels/cuda/creation.cu b/paddle/pten/kernels/cuda/creation.cu
index 95a561d0c94..444c52e87f5 100644
--- a/paddle/pten/kernels/cuda/creation.cu
+++ b/paddle/pten/kernels/cuda/creation.cu
@@ -20,9 +20,7 @@
 namespace pten {
 
 template <typename T>
-void FillAnyLike(const CUDAContext& dev_ctx,
-                 const Scalar& val,
-                 DenseTensor* out) {
+void FullLike(const CUDAContext& dev_ctx, const Scalar& val, DenseTensor* out) {
   auto value = val.to<float>();
   using CommonType = typename std::common_type<
       float,
@@ -52,10 +50,10 @@ void FillAnyLike(const CUDAContext& dev_ctx,
 }
 
 template <typename T>
-void FillConstant(const CUDAContext& dev_ctx,
-                  const ScalarArray& shape,
-                  const Scalar& val,
-                  DenseTensor* out) {
+void Full(const CUDAContext& dev_ctx,
+          const ScalarArray& shape,
+          const Scalar& val,
+          DenseTensor* out) {
   out->Resize(paddle::framework::make_ddim(shape.GetData()));
   eigen::fill<CUDAContext, T>(dev_ctx, out, val.to<T>());
 }
@@ -65,7 +63,7 @@ void FillConstant(const CUDAContext& dev_ctx,
 PT_REGISTER_KERNEL(full_like,
                    CUDA,
                    ALL_LAYOUT,
-                   pten::FillAnyLike,
+                   pten::FullLike,
                    float,
                    double,
                    int,
@@ -76,7 +74,7 @@ PT_REGISTER_KERNEL(full_like,
 PT_REGISTER_KERNEL(full,
                    CUDA,
                    ALL_LAYOUT,
-                   pten::FillConstant,
+                   pten::Full,
                    float,
                    double,
                    uint8_t,
diff --git a/paddle/pten/kernels/cuda/creation.h b/paddle/pten/kernels/cuda/creation.h
index 4943f720761..a8fa7dc84d4 100644
--- a/paddle/pten/kernels/cuda/creation.h
+++ b/paddle/pten/kernels/cuda/creation.h
@@ -28,15 +28,13 @@ namespace pten {
 using CUDAContext = paddle::platform::CUDADeviceContext;
 
 template <typename T>
-void FillAnyLike(const CUDAContext& dev_ctx,
-                 const Scalar& val,
-                 DenseTensor* out);
+void FullLike(const CUDAContext& dev_ctx, const Scalar& val, DenseTensor* out);
 
 template <typename T>
-void FillConstant(const CUDAContext& dev_ctx,
-                  const ScalarArray& shape,
-                  const Scalar& val,
-                  DenseTensor* out);
+void Full(const CUDAContext& dev_ctx,
+          const ScalarArray& shape,
+          const Scalar& val,
+          DenseTensor* out);
 
 }  // namespace pten
 
diff --git a/paddle/pten/kernels/cuda/math.cu b/paddle/pten/kernels/cuda/math.cu
index b4a60340e00..e0974181dc8 100644
--- a/paddle/pten/kernels/cuda/math.cu
+++ b/paddle/pten/kernels/cuda/math.cu
@@ -87,14 +87,14 @@ void Scale(const CUDAContext& dev_ctx,
       dev_ctx, x, scale.to<float>(), bias, bias_after_scale, out);
 }
 
-// Create the definition of ElementwiseAdd
+// Create the definition of Add
 DEFINE_CUDA_ELEMENTWISE_OP(Add)
-// Create the definition of ElementwiseSub
-DEFINE_CUDA_ELEMENTWISE_OP(Sub)
-// Create the definition of ElementwiseMul
-DEFINE_CUDA_ELEMENTWISE_OP(Mul)
-// Create the definition of ElementwiseDiv
-DEFINE_CUDA_ELEMENTWISE_OP(Div)
+// Create the definition of Subtract
+DEFINE_CUDA_ELEMENTWISE_OP(Subtract)
+// Create the definition of Multiply
+DEFINE_CUDA_ELEMENTWISE_OP(Multiply)
+// Create the definition of Divide
+DEFINE_CUDA_ELEMENTWISE_OP(Divide)
 
 template <typename T>
 void Sum(const CUDAContext& dev_ctx,
@@ -133,7 +133,7 @@ PT_REGISTER_KERNEL(scale,
 PT_REGISTER_KERNEL(add,
                    CUDA,
                    ALL_LAYOUT,
-                   pten::ElementwiseAdd,
+                   pten::Add,
                    float,
                    double,
                    int,
@@ -144,7 +144,7 @@ PT_REGISTER_KERNEL(add,
 PT_REGISTER_KERNEL(subtract,
                    CUDA,
                    ALL_LAYOUT,
-                   pten::ElementwiseSub,
+                   pten::Subtract,
                    float,
                    double,
                    int,
@@ -155,7 +155,7 @@ PT_REGISTER_KERNEL(subtract,
 PT_REGISTER_KERNEL(divide,
                    CUDA,
                    ALL_LAYOUT,
-                   pten::ElementwiseDiv,
+                   pten::Divide,
                    float,
                    double,
                    int,
@@ -166,7 +166,7 @@ PT_REGISTER_KERNEL(divide,
 PT_REGISTER_KERNEL(multiply,
                    CUDA,
                    ALL_LAYOUT,
-                   pten::ElementwiseMul,
+                   pten::Multiply,
                    float,
                    double,
                    int,
diff --git a/paddle/pten/kernels/cuda/math.h b/paddle/pten/kernels/cuda/math.h
index 0ac55f1f879..37f61c51249 100644
--- a/paddle/pten/kernels/cuda/math.h
+++ b/paddle/pten/kernels/cuda/math.h
@@ -49,32 +49,32 @@ void Scale(const CUDAContext& dev_ctx,
            DenseTensor* out);
 
 template <typename T>
-void ElementwiseAdd(const CUDAContext& dev_ctx,
-                    const DenseTensor& x,
-                    const DenseTensor& y,
-                    int axis,
-                    DenseTensor* out);
+void Add(const CUDAContext& dev_ctx,
+         const DenseTensor& x,
+         const DenseTensor& y,
+         int axis,
+         DenseTensor* out);
 
 template <typename T>
-void ElementwiseSub(const CUDAContext& dev_ctx,
-                    const DenseTensor& x,
-                    const DenseTensor& y,
-                    int axis,
-                    DenseTensor* out);
+void Subtract(const CUDAContext& dev_ctx,
+              const DenseTensor& x,
+              const DenseTensor& y,
+              int axis,
+              DenseTensor* out);
 
 template <typename T>
-void ElementwiseDiv(const CUDAContext& dev_ctx,
-                    const DenseTensor& x,
-                    const DenseTensor& y,
-                    int axis,
-                    DenseTensor* out);
+void Divide(const CUDAContext& dev_ctx,
+            const DenseTensor& x,
+            const DenseTensor& y,
+            int axis,
+            DenseTensor* out);
 
 template <typename T>
-void ElementwiseMul(const CUDAContext& dev_ctx,
-                    const DenseTensor& x,
-                    const DenseTensor& y,
-                    int axis,
-                    DenseTensor* out);
+void Multiply(const CUDAContext& dev_ctx,
+              const DenseTensor& x,
+              const DenseTensor& y,
+              int axis,
+              DenseTensor* out);
 
 template <typename T>
 void Sum(const CUDAContext& dev_ctx,
@@ -90,11 +90,11 @@ void Sum(const CUDAContext& dev_ctx,
 
 #define DEFINE_CUDA_ELEMENTWISE_OP(name)                               \
   template <typename T>                                                \
-  void Elementwise##name(const CUDAContext& dev_ctx,                   \
-                         const DenseTensor& x,                         \
-                         const DenseTensor& y,                         \
-                         int axis,                                     \
-                         DenseTensor* out) {                           \
+  void name(const CUDAContext& dev_ctx,                                \
+            const DenseTensor& x,                                      \
+            const DenseTensor& y,                                      \
+            int axis,                                                  \
+            DenseTensor* out) {                                        \
     std::vector<const DenseTensor*> inputs;                            \
     std::vector<DenseTensor*> outputs;                                 \
     inputs.emplace_back(&x);                                           \
diff --git a/paddle/pten/kernels/hybird/general/elementwise_functor.h b/paddle/pten/kernels/hybird/general/elementwise_functor.h
index 109f0907c6a..62b422f4ae4 100644
--- a/paddle/pten/kernels/hybird/general/elementwise_functor.h
+++ b/paddle/pten/kernels/hybird/general/elementwise_functor.h
@@ -72,7 +72,7 @@ struct InverseAddFunctor {
 
 // Subtract
 template <typename DevCtx, typename T, class Enable = void>
-struct SameDimsSubFunctor {
+struct SameDimsSubtractFunctor {
   void operator()(const DevCtx& dev_ctx,
                   const DenseTensor& x,
                   const DenseTensor& y,
@@ -80,7 +80,7 @@ struct SameDimsSubFunctor {
 };
 
 template <typename DevCtx, typename T>
-struct SameDimsSubFunctor<
+struct SameDimsSubtractFunctor<
     DevCtx,
     T,
     typename std::enable_if<std::is_floating_point<T>::value>::type> {
@@ -93,7 +93,7 @@ struct SameDimsSubFunctor<
 };
 
 template <typename DevCtx, typename T>
-struct SameDimsSubFunctor<
+struct SameDimsSubtractFunctor<
     DevCtx,
     T,
     typename std::enable_if<!std::is_floating_point<T>::value>::type> {
@@ -106,17 +106,17 @@ struct SameDimsSubFunctor<
 };
 
 template <typename T>
-struct SubFunctor {
+struct SubtractFunctor {
   inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a - b; }
 };
 template <typename T>
-struct InverseSubFunctor {
+struct InverseSubtractFunctor {
   inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b - a; }
 };
 
 // Divide
 template <typename DevCtx, typename T, class Enable = void>
-struct SameDimsDivFunctor {
+struct SameDimsDivideFunctor {
   void operator()(const DevCtx& dev_ctx,
                   const DenseTensor& x,
                   const DenseTensor& y,
@@ -124,7 +124,7 @@ struct SameDimsDivFunctor {
 };
 
 template <typename DevCtx, typename T>
-struct SameDimsDivFunctor<
+struct SameDimsDivideFunctor<
     DevCtx,
     T,
     typename std::enable_if<!std::is_floating_point<T>::value>::type> {
@@ -133,12 +133,13 @@ struct SameDimsDivFunctor<
                   const DenseTensor& y,
                   DenseTensor* z) {
     paddle::platform::errors::InvalidArgument(
-        "If use SameDimsDivFunctor, template args(T) must be floating point. ");
+        "If use SameDimsDivideFunctor, template args(T) must be floating "
+        "point. ");
   }
 };
 
 template <typename DevCtx, typename T>
-struct SameDimsDivFunctor<
+struct SameDimsDivideFunctor<
     DevCtx,
     T,
     typename std::enable_if<std::is_floating_point<T>::value>::type> {
@@ -155,13 +156,14 @@ struct SameDimsDivFunctor<
   "(floor) divide. Please check the input value."
 
 template <typename T, typename Enable = void>
-struct DivFunctor {
+struct DivideFunctor {
   inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; }
 };
 
 template <typename T>
-struct DivFunctor<T,
-                  typename std::enable_if<std::is_integral<T>::value>::type> {
+struct DivideFunctor<
+    T,
+    typename std::enable_if<std::is_integral<T>::value>::type> {
   inline HOSTDEVICE T operator()(const T& a, const T& b) const {
     // For int32/int64, need to check whether the divison is zero.
     PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
@@ -170,13 +172,13 @@ struct DivFunctor<T,
 };
 
 template <typename T, typename Enable = void>
-struct InverseDivFunctor {
+struct InverseDivideFunctor {
   inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b / a; }
 };
 
 // Multiply
 template <typename DevCtx, typename T, class Enable = void>
-struct SameDimsMulFunctor {
+struct SameDimsMultiplyFunctor {
   void operator()(const DevCtx& dev_ctx,
                   const DenseTensor& x,
                   const DenseTensor& y,
@@ -184,7 +186,7 @@ struct SameDimsMulFunctor {
 };
 
 template <typename DevCtx, typename T>
-struct SameDimsMulFunctor<
+struct SameDimsMultiplyFunctor<
     DevCtx,
     T,
     typename std::enable_if<std::is_floating_point<T>::value>::type> {
@@ -197,7 +199,7 @@ struct SameDimsMulFunctor<
 };
 
 template <typename DevCtx, typename T>
-struct SameDimsMulFunctor<
+struct SameDimsMultiplyFunctor<
     DevCtx,
     T,
     typename std::enable_if<!std::is_floating_point<T>::value>::type> {
@@ -209,11 +211,11 @@ struct SameDimsMulFunctor<
   }
 };
 template <typename T>
-struct MulFunctor {
+struct MultiplyFunctor {
   inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; }
 };
 template <typename T>
-struct InverseMulFunctor {
+struct InverseMultiplyFunctor {
   inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b * a; }
 };
 
diff --git a/paddle/pten/tests/kernels/test_fill_dev_api.cc b/paddle/pten/tests/kernels/test_fill_dev_api.cc
index 90a16e3c468..9a8b1f94e73 100644
--- a/paddle/pten/tests/kernels/test_fill_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_fill_dev_api.cc
@@ -44,7 +44,7 @@ TEST(DEV_API, fill_any_like) {
   auto* dev_ctx = pool.Get(paddle::platform::CPUPlace());
 
   // 2. test API
-  auto out = pten::FillAnyLike<float>(
+  auto out = pten::FullLike<float>(
       *(static_cast<paddle::platform::CPUDeviceContext*>(dev_ctx)),
       dense_x,
       val);
-- 
GitLab