[PHI]Standardise some C++ API (Part4) (#47702)

* standard api * fix sparse bugs * fix xpu bugs, test=kunlun * remove hard code for custom unittest * open ci, test=kunlun * deal with conflict

[PHI]Standardise some C++ API (Part4) (#47702)
* standard api * fix sparse bugs * fix xpu bugs, test=kunlun * remove hard code for custom unittest * open ci, test=kunlun * deal with conflict
594bd723 · YuanRisheng · GitHub · 28c56d77 · 594bd723 · 594bd723
38 changed file
--- a/paddle/fluid/operators/controlflow/compare_op.cc
+++ b/paddle/fluid/operators/controlflow/compare_op.cc
@@ -100,7 +100,7 @@ class CompareOp : public framework::OperatorWithKernel {
  char _##op_type##Comment::equation[]{_equation};                       \
  DECLARE_INFER_SHAPE_FUNCTOR(op_type,                                   \
                              op_type##_InferShapeFunctor,               \
-                              PD_INFER_META(phi::CompareInferMeta));     \
+                              PD_INFER_META(phi::CompareRawInferMeta));  \
  REGISTER_OPERATOR(                                                     \
      op_type,                                                           \
      ::paddle::operators::CompareOp<_##op_type##Comment>,               \

--- a/paddle/fluid/operators/generator/templates/operator_utils.c.j2
+++ b/paddle/fluid/operators/generator/templates/operator_utils.c.j2
@@ -400,7 +400,6 @@ class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker<T>
    grad_op->SetInput("{{attr_name | to_pascal_case}}Tensor", this->Input("{{attr_name | to_pascal_case}}Tensor"));
      {% endif %}
    {% else %}{# maybe something wrong: backward op has more attrs than the forward one#}
-    grad_op->AddAttr<{{attr["typename"] | to_op_attr_type}}>({{attr_name}}, "({{attr["typename"] | to_op_attr_type}}), exceptional attr {{attr_name}}");
    grad_op->SetAttr("{{attr_name}}", {{process_default_value(attr)}});
    {% endif %}
  {% endfor %}

--- a/paddle/fluid/pybind/eager_math_op_patch.cc
+++ b/paddle/fluid/pybind/eager_math_op_patch.cc
@@ -841,7 +841,7 @@ static PyObject* tensor__gt__method(TensorObject* self,
  VLOG(6) << "Calling greater_than_ad_func in tensor__gt__method";
  {
    eager_gil_scoped_release guard;
-    ret = greater_than_ad_func(self_tensor, other_tensor, -1);
+    ret = greater_than_ad_func(self_tensor, other_tensor);
  }
  return ToPyObject(ret);
@@ -927,7 +927,7 @@ static PyObject* tensor__ge__method(TensorObject* self,
  VLOG(6) << "Calling greater_equal_ad_func in tensor__ge__method";
  {
    eager_gil_scoped_release guard;
-    ret = greater_equal_ad_func(self_tensor, other_tensor, -1);
+    ret = greater_equal_ad_func(self_tensor, other_tensor);
  }
  return ToPyObject(ret);
@@ -1204,7 +1204,7 @@ static PyObject* tensor__lt__method(TensorObject* self,
  VLOG(6) << "Calling less_than_ad_func in tensor__lt__method";
  {
    eager_gil_scoped_release guard;
-    ret = less_than_ad_func(self_tensor, other_tensor, -1);
+    ret = less_than_ad_func(self_tensor, other_tensor);
  }
  return ToPyObject(ret);
@@ -1290,7 +1290,7 @@ static PyObject* tensor__le__method(TensorObject* self,
  VLOG(6) << "Calling less_equal_ad_func in tensor__le__method";
  {
    eager_gil_scoped_release guard;
-    ret = less_equal_ad_func(self_tensor, other_tensor, -1);
+    ret = less_equal_ad_func(self_tensor, other_tensor);
  }
  return ToPyObject(ret);
@@ -1636,7 +1636,7 @@ static PyObject* tensor__ne__method(TensorObject* self,
  VLOG(6) << "Calling not_equal_ad_func in tensor__ne__method";
  {
    eager_gil_scoped_release guard;
-    ret = not_equal_ad_func(self_tensor, other_tensor, -1);
+    ret = not_equal_ad_func(self_tensor, other_tensor);
  }
  return ToPyObject(ret);
@@ -1722,7 +1722,7 @@ static PyObject* tensor__eq__method(TensorObject* self,
  VLOG(6) << "Calling equal_ad_func in tensor__eq__method";
  {
    eager_gil_scoped_release guard;
-    ret = equal_ad_func(self_tensor, other_tensor, -1);
+    ret = equal_ad_func(self_tensor, other_tensor);
  }
  return ToPyObject(ret);

--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -67,7 +67,7 @@
    func : addmm_grad
 - backward_op : affine_grid_grad
-  forward : affine_grid (Tensor input, IntArray outputShape, bool use_cudnn=true, bool align_corners=true) -> Tensor(output)
+  forward : affine_grid (Tensor input, IntArray outputShape, bool align_corners=true, bool use_cudnn=true) -> Tensor(output)
  args : (Tensor output_grad, IntArray outputShape, bool use_cudnn=true, bool align_corners=true)
  output : Tensor(input_grad)
  infer_meta :
@@ -577,8 +577,8 @@
  inplace : (out_grad -> x_grad)
 - backward_op : fmax_grad
-  forward : fmax(Tensor x, Tensor y, int axis) -> Tensor(out)
+  forward : fmax(Tensor x, Tensor y) -> Tensor(out)
-  args : (Tensor x, Tensor y, Tensor out_grad, int axis)
+  args : (Tensor x, Tensor y, Tensor out_grad, int axis = -1)
  output : Tensor(x_grad), Tensor(y_grad)
  infer_meta :
    func : GeneralBinaryGradInferMeta
@@ -587,8 +587,8 @@
    func : fmax_grad
 - backward_op : fmin_grad
-  forward : fmin(Tensor x, Tensor y, int axis) -> Tensor(out)
+  forward : fmin(Tensor x, Tensor y) -> Tensor(out)
-  args : (Tensor x, Tensor y, Tensor out_grad, int axis)
+  args : (Tensor x, Tensor y, Tensor out_grad, int axis = -1)
  output : Tensor(x_grad), Tensor(y_grad)
  infer_meta :
    func : GeneralBinaryGradInferMeta
@@ -684,8 +684,8 @@
    func : gumbel_softmax_grad
 - backward_op : hardswish_grad
-  forward : hardswish (Tensor x, float threshold = 6.0, float scale = 6.0, float offset = 3.0) -> Tensor(out)
+  forward : hardswish (Tensor x) -> Tensor(out)
-  args : (Tensor x, Tensor out_grad, float threshold, float scale, float offset)
+  args : (Tensor x, Tensor out_grad, float threshold = 6.0, float scale = 6.0, float offset = 3.0)
  output : Tensor(x_grad)
  infer_meta :
    func : UnchangedInferMeta
@@ -1418,8 +1418,8 @@
  invoke : real_grad_impl(out_grad, x_grad)
 - backward_op : relu6_grad
-  forward : relu6 (Tensor x, float threshold) -> Tensor(out)
+  forward : relu6 (Tensor x) -> Tensor(out)
-  args : (Tensor out, Tensor out_grad, float threshold)
+  args : (Tensor out, Tensor out_grad, float threshold = 6)
  output : Tensor(x_grad)
  infer_meta :
    func : UnchangedInferMeta
@@ -1810,7 +1810,7 @@
  optional: u_grad, vh_grad, s_grad
 - backward_op : swish_grad
-  forward : swish (Tensor x, float beta=1.0) -> Tensor(out)
+  forward : swish (Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad, float bete=1.0)
  output : Tensor(x_grad)
  infer_meta :

--- a/paddle/phi/api/yaml/legacy_ops.yaml
+++ b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -97,7 +97,7 @@
  backward : addmm_grad
 - op : affine_grid
-  args : (Tensor input, IntArray outputShape, bool use_cudnn=true, bool align_corners=true)
+  args : (Tensor input, IntArray outputShape, bool align_corners=true, bool use_cudnn=true)
  output : Tensor
  infer_meta :
    func : AffineGridInferMeta
@@ -649,7 +649,7 @@
    backend : place > x
 - op : equal
-  args : (Tensor x, Tensor y, int axis = -1)
+  args : (Tensor x, Tensor y)
  output : Tensor(out)
  infer_meta :
    func : CompareInferMeta
@@ -751,7 +751,7 @@
    func : floor_divide
 - op : fmax
-  args : (Tensor x, Tensor y,  int axis)
+  args : (Tensor x, Tensor y)
  output : Tensor(out)
  infer_meta :
    param: [x, y]
@@ -761,7 +761,7 @@
  backward : fmax_grad
 - op : fmin
-  args : (Tensor x, Tensor y,  int axis)
+  args : (Tensor x, Tensor y)
  output : Tensor(out)
  infer_meta :
    param: [x, y]
@@ -898,7 +898,7 @@
    func : generate_proposals_v2
 - op : greater_equal
-  args : (Tensor x, Tensor y, int axis = -1)
+  args : (Tensor x, Tensor y)
  output : Tensor(out)
  infer_meta :
    func : CompareInferMeta
@@ -906,7 +906,7 @@
    func : greater_equal
 - op : greater_than
-  args : (Tensor x, Tensor y, int axis = -1)
+  args : (Tensor x, Tensor y)
  output : Tensor(out)
  infer_meta :
    func : CompareInferMeta
@@ -945,7 +945,7 @@
  backward : gumbel_softmax_grad
 - op : hardswish
-  args : (Tensor x, float threshold = 6.0, float scale = 6.0, float offset = 3.0)
+  args : (Tensor x)
  output : Tensor
  infer_meta :
    func : UnchangedInferMeta
@@ -1180,7 +1180,7 @@
  backward : lerp_grad
 - op : less_equal
-  args : (Tensor x, Tensor y, int axis = -1)
+  args : (Tensor x, Tensor y)
  output : Tensor(out)
  infer_meta :
    func : CompareInferMeta
@@ -1188,7 +1188,7 @@
    func : less_equal
 - op : less_than
-  args : (Tensor x, Tensor y, int axis = -1)
+  args : (Tensor x, Tensor y)
  output : Tensor(out)
  infer_meta :
    func : CompareInferMeta
@@ -1623,7 +1623,7 @@
  backward : norm_grad
 - op : not_equal
-  args : (Tensor x, Tensor y, int axis = -1)
+  args : (Tensor x, Tensor y)
  output : Tensor(out)
  infer_meta :
    func : CompareInferMeta
@@ -1820,7 +1820,7 @@
  backward : real_grad
 - op : relu6
-  args : (Tensor x, float threshold)
+  args : (Tensor x)
  output : Tensor
  infer_meta :
    func : UnchangedInferMeta
@@ -2192,9 +2192,8 @@
    func : svd
  backward : svd_grad
-# The python API paddle.nn.functional.swish has no `bete` argument, it may be removed later
 - op : swish
-  args : (Tensor x, float beta=1.0)
+  args : (Tensor x)
  output : Tensor(out)
  infer_meta :
    func : UnchangedInferMeta

--- a/paddle/phi/api/yaml/sparse_backward.yaml
+++ b/paddle/phi/api/yaml/sparse_backward.yaml
@@ -251,8 +251,8 @@
           pow_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
 - backward_op : relu6_grad
-  forward : relu6(Tensor x, float threshold) -> Tensor(out)
+  forward : relu6(Tensor x) -> Tensor(out)
-  args : (Tensor out, Tensor out_grad, float threshold)
+  args : (Tensor out, Tensor out_grad, float threshold = 6)
  output : Tensor(x_grad)
  infer_meta :
    func : UnchangedInferMeta

--- a/paddle/phi/api/yaml/sparse_ops.yaml
+++ b/paddle/phi/api/yaml/sparse_ops.yaml
@@ -213,7 +213,7 @@
  backward : relu_grad
 - op : relu6
-  args : (Tensor x, float threshold)
+  args : (Tensor x)
  output : Tensor(out)
  infer_meta :
    func : UnchangedInferMeta

--- a/paddle/phi/infermeta/binary.cc
+++ b/paddle/phi/infermeta/binary.cc
@@ -328,10 +328,10 @@ void CholeskySolveInferMeta(const MetaTensor& x,
  out->share_lod(x);
 }
-void CompareInferMeta(const MetaTensor& x,
+void CompareRawInferMeta(const MetaTensor& x,
-                      const MetaTensor& y,
+                         const MetaTensor& y,
-                      int axis,
+                         int axis,
-                      MetaTensor* out) {
+                         MetaTensor* out) {
  auto dim_x = x.dims();
  auto dim_y = y.dims();
@@ -358,6 +358,12 @@ void CompareInferMeta(const MetaTensor& x,
  out->set_dtype(DataType::BOOL);
 }
+void CompareInferMeta(const MetaTensor& x,
+                      const MetaTensor& y,
+                      MetaTensor* out) {
+  CompareRawInferMeta(x, y, -1, out);
+}
 void CompareAllInferMeta(const MetaTensor& x,
                         const MetaTensor& y,
                         MetaTensor* out) {

--- a/paddle/phi/infermeta/binary.h
+++ b/paddle/phi/infermeta/binary.h
@@ -69,9 +69,13 @@ void CompareAllInferMeta(const MetaTensor& x,
 void CompareInferMeta(const MetaTensor& x,
                      const MetaTensor& y,
-                      int axis,
                      MetaTensor* out);
+void CompareRawInferMeta(const MetaTensor& x,
+                         const MetaTensor& y,
+                         int axis,
+                         MetaTensor* out);
 void ComplexInferMeta(const MetaTensor& x,
                      const MetaTensor& y,
                      MetaTensor* out);

--- a/paddle/phi/kernels/activation_kernel.cc
+++ b/paddle/phi/kernels/activation_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/phi/kernels/activation_kernel.h"
+#include "paddle/phi/backends/all_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+namespace phi {
+template <typename T, typename Context>
+void HardSwishKernel(const Context& dev_ctx,
+                     const DenseTensor& x,
+                     DenseTensor* out) {
+  HardSwishRawKernel<T, Context>(dev_ctx, x, 6, 6, 3, out);
+}
+template <typename T, typename Context>
+void Relu6Kernel(const Context& dev_ctx,
+                 const DenseTensor& x,
+                 DenseTensor* out) {
+  Relu6RawKernel<T, Context>(dev_ctx, x, 6, out);
+}
+template <typename T, typename Context>
+void SwishKernel(const Context& dev_ctx,
+                 const DenseTensor& x,
+                 DenseTensor* out) {
+  SwishRawKernel<T, Context>(dev_ctx, x, 1.0, out);
+}
+}  // namespace phi
+using complex64 = ::phi::dtype::complex<float>;
+using complex128 = ::phi::dtype::complex<double>;
+PD_REGISTER_KERNEL(
+    hard_swish, CPU, ALL_LAYOUT, phi::HardSwishKernel, float, double) {}
+PD_REGISTER_KERNEL(relu6, CPU, ALL_LAYOUT, phi::Relu6Kernel, float, double) {}
+PD_REGISTER_KERNEL(swish, CPU, ALL_LAYOUT, phi::SwishKernel, float, double) {}
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+PD_REGISTER_KERNEL(hard_swish,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::HardSwishKernel,
+                   float,
+                   double,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
+PD_REGISTER_KERNEL(relu6,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::Relu6Kernel,
+                   float,
+                   double,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
+PD_REGISTER_KERNEL(swish,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::SwishKernel,
+                   float,
+                   double,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
+#endif
+#if defined PADDLE_WITH_XPU
+PD_REGISTER_KERNEL(hard_swish, XPU, ALL_LAYOUT, phi::HardSwishKernel, float) {}
+PD_REGISTER_KERNEL(relu6, XPU, ALL_LAYOUT, phi::Relu6Kernel, float) {}
+PD_REGISTER_KERNEL(swish, XPU, ALL_LAYOUT, phi::SwishKernel, float) {}
+#endif
+#ifdef PADDLE_WITH_MKLDNN
+PD_REGISTER_KERNEL(hard_swish,
+                   OneDNN,
+                   ONEDNN,
+                   phi::HardSwishKernel,
+                   float,
+                   phi::dtype::bfloat16) {}
+PD_REGISTER_KERNEL(
+    relu6, OneDNN, ONEDNN, phi::Relu6Kernel, float, phi::dtype::bfloat16) {}
+PD_REGISTER_KERNEL(
+    swish, OneDNN, ONEDNN, phi::SwishKernel, float, phi::dtype::bfloat16) {}
+#endif
--- a/paddle/phi/kernels/activation_kernel.h
+++ b/paddle/phi/kernels/activation_kernel.h
@@ -75,13 +75,13 @@ DECLARE_ACTIVATION_KERNEL(Negative)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu, alpha)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, threshold)
-DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Relu6, threshold)
+DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Relu6Raw, threshold)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(SoftShrink, lambda)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Mish, threshold)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(HardShrink, threshold)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(SoftShrink, lambda)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Elu, alpha)
-DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Swish, beta)
+DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(SwishRaw, beta)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Celu, alpha)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Logit, eps)
@@ -90,14 +90,29 @@ DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(STanh, scale_a, scale_b)
 DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(Softplus, beta, threshold)
 DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(HardSigmoid, slope, offset)
+template <typename T, typename Context>
+void HardSwishRawKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        float threshold,
+                        float scale,
+                        float offset,
+                        DenseTensor* out);
 template <typename T, typename Context>
 void HardSwishKernel(const Context& dev_ctx,
                     const DenseTensor& x,
-                     float threshold,
-                     float scale,
-                     float offset,
                     DenseTensor* out);
+template <typename T, typename Context>
+void Relu6Kernel(const Context& dev_ctx,
+                 const DenseTensor& x,
+                 DenseTensor* out);
+template <typename T, typename Context>
+void SwishKernel(const Context& dev_ctx,
+                 const DenseTensor& x,
+                 DenseTensor* out);
 template <typename T, typename Context>
 void PowKernel(const Context& dev_ctx,
               const DenseTensor& x,

--- a/paddle/phi/kernels/compare_kernel.h
+++ b/paddle/phi/kernels/compare_kernel.h
@@ -18,20 +18,25 @@ limitations under the License. */
 namespace phi {
-#define DECALRE_COMPARE_KERNEL(compare_kernel) \
+#define DECALRE_COMPARE_KERNEL(name)         \
-  template <typename T, typename Context>      \
+  template <typename T, typename Context>    \
-  void compare_kernel(const Context& ctx,      \
+  void name##RawKernel(const Context& ctx,   \
-                      const DenseTensor& x,    \
+                       const DenseTensor& x, \
-                      const DenseTensor& y,    \
+                       const DenseTensor& y, \
-                      int axis,                \
+                       int axis,             \
-                      DenseTensor* out);
+                       DenseTensor* out);    \
+  template <typename T, typename Context>    \
-DECALRE_COMPARE_KERNEL(LessThanKernel)
+  void name##Kernel(const Context& ctx,      \
-DECALRE_COMPARE_KERNEL(LessEqualKernel)
+                    const DenseTensor& x,    \
-DECALRE_COMPARE_KERNEL(GreaterThanKernel)
+                    const DenseTensor& y,    \
-DECALRE_COMPARE_KERNEL(GreaterEqualKernel)
+                    DenseTensor* out);
-DECALRE_COMPARE_KERNEL(EqualKernel)
-DECALRE_COMPARE_KERNEL(NotEqualKernel)
+DECALRE_COMPARE_KERNEL(LessThan)
+DECALRE_COMPARE_KERNEL(LessEqual)
+DECALRE_COMPARE_KERNEL(GreaterThan)
+DECALRE_COMPARE_KERNEL(GreaterEqual)
+DECALRE_COMPARE_KERNEL(Equal)
+DECALRE_COMPARE_KERNEL(NotEqual)
 #undef DECALRE_COMPARE_KERNEL
 #define DECALRE_COMPARE_ALL_KERNEL(compare_all_kernel) \

--- a/paddle/phi/kernels/cpu/activation_kernel.cc
+++ b/paddle/phi/kernels/cpu/activation_kernel.cc
@@ -96,12 +96,12 @@ DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, LeakyReluFunctor, alpha)
 DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu,
                                     ThresholdedReluFunctor,
                                     threshold)
-DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Relu6, Relu6Functor, threshold)
+DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Relu6Raw, Relu6Functor, threshold)
 DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Mish, MishFunctor, threshold)
 DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink, HardShrinkFunctor, threshold)
 DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, SoftShrinkFunctor, lambda)
 DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Elu, ELUFunctor, alpha)
-DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Swish, SwishFunctor, beta)
+DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(SwishRaw, SwishFunctor, beta)
 DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Celu, CELUFunctor, alpha)
 DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(HardTanh, HardTanhFunctor, t_min, t_max)
@@ -113,12 +113,12 @@ DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid,
                                     offset)
 template <typename T, typename Context>
-void HardSwishKernel(const Context& dev_ctx,
+void HardSwishRawKernel(const Context& dev_ctx,
-                     const DenseTensor& x,
+                        const DenseTensor& x,
-                     float threshold,
+                        float threshold,
-                     float scale,
+                        float scale,
-                     float offset,
+                        float offset,
-                     DenseTensor* out) {
+                        DenseTensor* out) {
  funcs::HardSwishFunctor<T> functor;
  auto attrs = functor.GetAttrs();
  *(attrs[0].second) = threshold;
@@ -149,7 +149,7 @@ PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel)
 PD_REGISTER_ACTIVATION_KERNEL(hard_tanh, HardTanhKernel)
 PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
 PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel)
-PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
+PD_REGISTER_ACTIVATION_KERNEL(relu6_raw, Relu6RawKernel)
 PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel)
 PD_REGISTER_ACTIVATION_KERNEL(softshrink, SoftShrinkKernel)
 PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel)
@@ -182,8 +182,8 @@ PD_REGISTER_ACTIVATION_KERNEL(log, LogKernel)
 PD_REGISTER_ACTIVATION_KERNEL(log2, Log2Kernel)
 PD_REGISTER_ACTIVATION_KERNEL(log10, Log10Kernel)
 PD_REGISTER_ACTIVATION_KERNEL(log1p, Log1pKernel)
-PD_REGISTER_ACTIVATION_KERNEL(hard_swish, HardSwishKernel)
+PD_REGISTER_ACTIVATION_KERNEL(swish_raw, SwishRawKernel)
-PD_REGISTER_ACTIVATION_KERNEL(swish, SwishKernel)
+PD_REGISTER_ACTIVATION_KERNEL(hard_swish_raw, HardSwishRawKernel)
 PD_REGISTER_ACTIVATION_KERNEL(round, RoundKernel)
 PD_REGISTER_ACTIVATION_KERNEL(floor, FloorKernel)
 PD_REGISTER_ACTIVATION_KERNEL(ceil, CeilKernel)

--- a/paddle/phi/kernels/cpu/compare_kernel.cc
+++ b/paddle/phi/kernels/cpu/compare_kernel.cc
@@ -71,73 +71,6 @@ inline void CompareAllKernelImpl(const Context& ctx,
 }  // namespace phi
-PD_REGISTER_KERNEL(less_than,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::LessThanKernel,
-                   bool,
-                   int16_t,
-                   int,
-                   int64_t,
-                   float,
-                   double,
-                   phi::dtype::float16) {}
-PD_REGISTER_KERNEL(less_equal,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::LessEqualKernel,
-                   bool,
-                   int16_t,
-                   int,
-                   int64_t,
-                   float,
-                   double,
-                   phi::dtype::float16) {}
-PD_REGISTER_KERNEL(greater_than,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::GreaterThanKernel,
-                   bool,
-                   int16_t,
-                   int,
-                   int64_t,
-                   float,
-                   double,
-                   phi::dtype::float16) {}
-PD_REGISTER_KERNEL(greater_equal,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::GreaterEqualKernel,
-                   bool,
-                   int16_t,
-                   int,
-                   int64_t,
-                   float,
-                   double,
-                   phi::dtype::float16) {}
-PD_REGISTER_KERNEL(equal,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::EqualKernel,
-                   bool,
-                   int16_t,
-                   int,
-                   int64_t,
-                   float,
-                   double,
-                   phi::dtype::float16) {}
-PD_REGISTER_KERNEL(not_equal,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::NotEqualKernel,
-                   bool,
-                   int16_t,
-                   int,
-                   int64_t,
-                   float,
-                   double,
-                   phi::dtype::float16) {}
 PD_REGISTER_KERNEL(equal_all,
                   CPU,
                   ALL_LAYOUT,
@@ -147,3 +80,33 @@ PD_REGISTER_KERNEL(equal_all,
                   int64_t,
                   float,
                   double) {}
+#define PD_REGISTER_COMPARE_KERNEL(name, func) \
+  PD_REGISTER_KERNEL(name,                     \
+                     CPU,                      \
+                     ALL_LAYOUT,               \
+                     phi::func##Kernel,        \
+                     bool,                     \
+                     int16_t,                  \
+                     int,                      \
+                     int64_t,                  \
+                     float,                    \
+                     double,                   \
+                     phi::dtype::float16) {}   \
+  PD_REGISTER_KERNEL(name##_raw,               \
+                     CPU,                      \
+                     ALL_LAYOUT,               \
+                     phi::func##RawKernel,     \
+                     bool,                     \
+                     int16_t,                  \
+                     int,                      \
+                     int64_t,                  \
+                     float,                    \
+                     double,                   \
+                     phi::dtype::float16) {}
+PD_REGISTER_COMPARE_KERNEL(less_than, LessThan)
+PD_REGISTER_COMPARE_KERNEL(less_equal, LessEqual)
+PD_REGISTER_COMPARE_KERNEL(greater_than, GreaterThan)
+PD_REGISTER_COMPARE_KERNEL(greater_equal, GreaterEqual)
+PD_REGISTER_COMPARE_KERNEL(equal, Equal)
+PD_REGISTER_COMPARE_KERNEL(not_equal, NotEqual)
--- a/paddle/phi/kernels/cpu/elementwise_kernel.cc
+++ b/paddle/phi/kernels/cpu/elementwise_kernel.cc
@@ -122,11 +122,23 @@ using complex128 = ::phi::dtype::complex<double>;
 // NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16
 // using bfloat16 = ::phi::dtype::bfloat16;
-PD_REGISTER_KERNEL(
+PD_REGISTER_KERNEL(fmax_raw,
-    fmax, CPU, ALL_LAYOUT, phi::FMaxKernel, float, double, int, int64_t) {}
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::FMaxRawKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t) {}
-PD_REGISTER_KERNEL(
+PD_REGISTER_KERNEL(fmin_raw,
-    fmin, CPU, ALL_LAYOUT, phi::FMinKernel, float, double, int, int64_t) {}
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::FMinRawKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t) {}
 PD_REGISTER_KERNEL(maximum_raw,
                   CPU,

--- a/paddle/phi/kernels/elementwise_kernel.cc
+++ b/paddle/phi/kernels/elementwise_kernel.cc
@@ -110,10 +110,32 @@ void SubtractKernel(const Context& dev_ctx,
  SubtractRawKernel<T>(dev_ctx, x, y, axis, out);
 }
+template <typename T, typename Context>
+void FMaxKernel(const Context& dev_ctx,
+                const DenseTensor& x,
+                const DenseTensor& y,
+                DenseTensor* out) {
+  FMaxRawKernel<T, Context>(dev_ctx, x, y, -1, out);
+}
+template <typename T, typename Context>
+void FMinKernel(const Context& dev_ctx,
+                const DenseTensor& x,
+                const DenseTensor& y,
+                DenseTensor* out) {
+  FMinRawKernel<T, Context>(dev_ctx, x, y, -1, out);
+}
 }  // namespace phi
 using complex64 = ::phi::dtype::complex<float>;
 using complex128 = ::phi::dtype::complex<double>;
+PD_REGISTER_KERNEL(
+    fmax, CPU, ALL_LAYOUT, phi::FMaxKernel, float, double, int, int64_t) {}
+PD_REGISTER_KERNEL(
+    fmin, CPU, ALL_LAYOUT, phi::FMinKernel, float, double, int, int64_t) {}
 PD_REGISTER_KERNEL(maximum,
                   CPU,
                   ALL_LAYOUT,
@@ -210,6 +232,26 @@ PD_REGISTER_KERNEL(divide,
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+PD_REGISTER_KERNEL(fmax,
+                   KPS,
+                   ALL_LAYOUT,
+                   phi::FMaxKernel,
+                   float,
+                   double,
+                   int,
+                   phi::dtype::float16,
+                   int64_t) {}
+PD_REGISTER_KERNEL(fmin,
+                   KPS,
+                   ALL_LAYOUT,
+                   phi::FMinKernel,
+                   float,
+                   double,
+                   int,
+                   phi::dtype::float16,
+                   int64_t) {}
 PD_REGISTER_KERNEL(maximum,
                   KPS,
                   ALL_LAYOUT,

--- a/paddle/phi/kernels/elementwise_kernel.h
+++ b/paddle/phi/kernels/elementwise_kernel.h
@@ -19,18 +19,30 @@
 namespace phi {
+template <typename T, typename Context>
+void FMaxRawKernel(const Context& dev_ctx,
+                   const DenseTensor& x,
+                   const DenseTensor& y,
+                   int axis,
+                   DenseTensor* out);
 template <typename T, typename Context>
 void FMaxKernel(const Context& dev_ctx,
                const DenseTensor& x,
                const DenseTensor& y,
-                int axis,
                DenseTensor* out);
+template <typename T, typename Context>
+void FMinRawKernel(const Context& dev_ctx,
+                   const DenseTensor& x,
+                   const DenseTensor& y,
+                   int axis,
+                   DenseTensor* out);
 template <typename T, typename Context>
 void FMinKernel(const Context& dev_ctx,
                const DenseTensor& x,
                const DenseTensor& y,
-                int axis,
                DenseTensor* out);
 template <typename T, typename Context>

--- a/paddle/phi/kernels/gpu/activation_kernel.cu
+++ b/paddle/phi/kernels/gpu/activation_kernel.cu
@@ -112,13 +112,13 @@ DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, CudaLeakyReluFunctor, alpha)
 DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu,
                                     CudaThresholdedReluFunctor,
                                     threshold)
-DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Relu6, CudaRelu6Functor, threshold)
+DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Relu6Raw, CudaRelu6Functor, threshold)
 DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink,
                                     CudaHardShrinkFunctor,
                                     threshold)
 DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, CudaSoftShrinkFunctor, lambda)
 DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Elu, CudaELUFunctor, alpha)
-DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Swish, CudaSwishFunctor, beta)
+DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(SwishRaw, CudaSwishFunctor, beta)
 DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Mish, CudaMishFunctor, threshold)
 DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Celu, CudaCELUFunctor, alpha)
@@ -138,12 +138,12 @@ DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid,
 DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(Selu, CudaSeluFunctor, scale, alpha)
 template <typename T, typename Context>
-void HardSwishKernel(const Context& dev_ctx,
+void HardSwishRawKernel(const Context& dev_ctx,
-                     const DenseTensor& x,
+                        const DenseTensor& x,
-                     float threshold,
+                        float threshold,
-                     float scale,
+                        float scale,
-                     float offset,
+                        float offset,
-                     DenseTensor* out) {
+                        DenseTensor* out) {
  funcs::CudaHardSwishFunctor<T> functor;
  auto attrs = functor.GetAttrs();
  *(attrs[0].second) = threshold;
@@ -198,7 +198,7 @@ PD_REGISTER_ACTIVATION_KERNEL(atanh, AtanhKernel)
 PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel)
 PD_REGISTER_ACTIVATION_KERNEL(hard_tanh, HardTanhKernel)
 PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel)
-PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
+PD_REGISTER_ACTIVATION_KERNEL(relu6_raw, Relu6RawKernel)
 PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
 PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel)
 PD_REGISTER_ACTIVATION_KERNEL(stanh, StanhKernel)
@@ -254,8 +254,8 @@ PD_REGISTER_ACTIVATION_KERNEL(log, LogKernel)
 PD_REGISTER_ACTIVATION_KERNEL(log2, Log2Kernel)
 PD_REGISTER_ACTIVATION_KERNEL(log10, Log10Kernel)
 PD_REGISTER_ACTIVATION_KERNEL(log1p, Log1pKernel)
-PD_REGISTER_ACTIVATION_KERNEL(hard_swish, HardSwishKernel)
+PD_REGISTER_ACTIVATION_KERNEL(hard_swish_raw, HardSwishRawKernel)
-PD_REGISTER_ACTIVATION_KERNEL(swish, SwishKernel)
+PD_REGISTER_ACTIVATION_KERNEL(swish_raw, SwishRawKernel)
 PD_REGISTER_ACTIVATION_KERNEL(round, RoundKernel)
 PD_REGISTER_ACTIVATION_KERNEL(floor, FloorKernel)
 PD_REGISTER_ACTIVATION_KERNEL(ceil, CeilKernel)

--- a/paddle/phi/kernels/impl/compare_kernel_impl.h
+++ b/paddle/phi/kernels/impl/compare_kernel_impl.h
@@ -36,33 +36,38 @@ inline void CompareAllKernelImpl(const Context& ctx,
                                 const DenseTensor& y,
                                 DenseTensor* out);
-#define DEFINE_COMPARE_KERNEL(compare_kernel, functor, inverse_functor) \
+#define DEFINE_COMPARE_KERNEL(name, functor, inverse_functor)      \
-  template <typename T, typename Context>                               \
+  template <typename T, typename Context>                          \
-  void compare_kernel(const Context& ctx,                               \
+  void name##RawKernel(const Context& ctx,                         \
-                      const DenseTensor& x,                             \
+                       const DenseTensor& x,                       \
-                      const DenseTensor& y,                             \
+                       const DenseTensor& y,                       \
-                      int axis,                                         \
+                       int axis,                                   \
-                      DenseTensor* out) {                               \
+                       DenseTensor* out) {                         \
-    CompareKernelImpl<T, Context, functor<T>, inverse_functor<T>>(      \
+    CompareKernelImpl<T, Context, functor<T>, inverse_functor<T>>( \
-        ctx, x, y, axis, out);                                          \
+        ctx, x, y, axis, out);                                     \
+  }                                                                \
+  template <typename T, typename Context>                          \
+  void name##Kernel(const Context& ctx,                            \
+                    const DenseTensor& x,                          \
+                    const DenseTensor& y,                          \
+                    DenseTensor* out) {                            \
+    name##RawKernel<T, Context>(ctx, x, y, -1, out);               \
  }
-DEFINE_COMPARE_KERNEL(LessThanKernel,
+DEFINE_COMPARE_KERNEL(LessThan,
                      funcs::LessThanFunctor,
                      funcs::GreaterThanFunctor)
-DEFINE_COMPARE_KERNEL(LessEqualKernel,
+DEFINE_COMPARE_KERNEL(LessEqual,
                      funcs::LessEqualFunctor,
                      funcs::GreaterEqualFunctor)
-DEFINE_COMPARE_KERNEL(GreaterThanKernel,
+DEFINE_COMPARE_KERNEL(GreaterThan,
                      funcs::GreaterThanFunctor,
                      funcs::LessThanFunctor)
-DEFINE_COMPARE_KERNEL(GreaterEqualKernel,
+DEFINE_COMPARE_KERNEL(GreaterEqual,
                      funcs::GreaterEqualFunctor,
                      funcs::LessEqualFunctor)
-DEFINE_COMPARE_KERNEL(EqualKernel, funcs::EqualFunctor, funcs::EqualFunctor)
+DEFINE_COMPARE_KERNEL(Equal, funcs::EqualFunctor, funcs::EqualFunctor)
-DEFINE_COMPARE_KERNEL(NotEqualKernel,
+DEFINE_COMPARE_KERNEL(NotEqual, funcs::NotEqualFunctor, funcs::NotEqualFunctor)
-                      funcs::NotEqualFunctor,
-                      funcs::NotEqualFunctor)
 #undef DEFINE_COMPARE_KERNEL
 #define DEFINE_COMPARE_ALL_KERNEL(compare_all_kernel, functor)    \

--- a/paddle/phi/kernels/impl/elementwise_kernel_impl.h
+++ b/paddle/phi/kernels/impl/elementwise_kernel_impl.h
@@ -67,22 +67,22 @@ namespace phi {
  }
 template <typename T, typename Context>
-void FMaxKernel(const Context& dev_ctx,
+void FMaxRawKernel(const Context& dev_ctx,
-                const DenseTensor& x,
+                   const DenseTensor& x,
-                const DenseTensor& y,
+                   const DenseTensor& y,
-                int axis,
+                   int axis,
-                DenseTensor* out) {
+                   DenseTensor* out) {
  dev_ctx.template Alloc<T>(out);
  funcs::ElementwiseCompute<funcs::FMaxFunctor<T>, T, T>(
      dev_ctx, x, y, axis, funcs::FMaxFunctor<T>(), out);
 }
 template <typename T, typename Context>
-void FMinKernel(const Context& dev_ctx,
+void FMinRawKernel(const Context& dev_ctx,
-                const DenseTensor& x,
+                   const DenseTensor& x,
-                const DenseTensor& y,
+                   const DenseTensor& y,
-                int axis,
+                   int axis,
-                DenseTensor* out) {
+                   DenseTensor* out) {
  dev_ctx.template Alloc<T>(out);
  funcs::ElementwiseCompute<funcs::FMinFunctor<T>, T, T>(
      dev_ctx, x, y, axis, funcs::FMinFunctor<T>(), out);

--- a/paddle/phi/kernels/kps/compare_kernel.cu
+++ b/paddle/phi/kernels/kps/compare_kernel.cu
@@ -103,79 +103,20 @@ PD_REGISTER_KERNEL(
    greater_equal, KPS, ALL_LAYOUT, phi::GreaterEqualKernel, int) {}
 PD_REGISTER_KERNEL(equal, KPS, ALL_LAYOUT, phi::EqualKernel, int) {}
 PD_REGISTER_KERNEL(not_equal, KPS, ALL_LAYOUT, phi::NotEqualKernel, int) {}
+PD_REGISTER_KERNEL(
+    less_than_raw, KPS, ALL_LAYOUT, phi::LessThanRawKernel, int) {}
+PD_REGISTER_KERNEL(
+    less_equal_raw, KPS, ALL_LAYOUT, phi::LessEqualRawKernel, int) {}
+PD_REGISTER_KERNEL(
+    greater_than_raw, KPS, ALL_LAYOUT, phi::GreaterThanRawKernel, int) {}
+PD_REGISTER_KERNEL(
+    greater_equal_raw, KPS, ALL_LAYOUT, phi::GreaterEqualRawKernel, int) {}
+PD_REGISTER_KERNEL(equal_raw, KPS, ALL_LAYOUT, phi::EqualRawKernel, int) {}
+PD_REGISTER_KERNEL(
+    not_equal_raw, KPS, ALL_LAYOUT, phi::NotEqualRawKernel, int) {}
 #else
-PD_REGISTER_KERNEL(less_than,
-                   KPS,
-                   ALL_LAYOUT,
-                   phi::LessThanKernel,
-                   bool,
-                   int16_t,
-                   int,
-                   int64_t,
-                   float,
-                   double,
-                   phi::dtype::float16,
-                   phi::dtype::bfloat16) {}
-PD_REGISTER_KERNEL(less_equal,
-                   KPS,
-                   ALL_LAYOUT,
-                   phi::LessEqualKernel,
-                   bool,
-                   int16_t,
-                   int,
-                   int64_t,
-                   float,
-                   double,
-                   phi::dtype::float16,
-                   phi::dtype::bfloat16) {}
-PD_REGISTER_KERNEL(greater_than,
-                   KPS,
-                   ALL_LAYOUT,
-                   phi::GreaterThanKernel,
-                   bool,
-                   int16_t,
-                   int,
-                   int64_t,
-                   float,
-                   double,
-                   phi::dtype::float16,
-                   phi::dtype::bfloat16) {}
-PD_REGISTER_KERNEL(greater_equal,
-                   KPS,
-                   ALL_LAYOUT,
-                   phi::GreaterEqualKernel,
-                   bool,
-                   int16_t,
-                   int,
-                   int64_t,
-                   float,
-                   double,
-                   phi::dtype::float16,
-                   phi::dtype::bfloat16) {}
-PD_REGISTER_KERNEL(equal,
-                   KPS,
-                   ALL_LAYOUT,
-                   phi::EqualKernel,
-                   bool,
-                   int16_t,
-                   int,
-                   int64_t,
-                   float,
-                   double,
-                   phi::dtype::float16,
-                   phi::dtype::bfloat16) {}
-PD_REGISTER_KERNEL(not_equal,
-                   KPS,
-                   ALL_LAYOUT,
-                   phi::NotEqualKernel,
-                   bool,
-                   int16_t,
-                   int,
-                   int64_t,
-                   float,
-                   double,
-                   phi::dtype::float16,
-                   phi::dtype::bfloat16) {}
 PD_REGISTER_KERNEL(equal_all,
                   KPS,
@@ -186,4 +127,38 @@ PD_REGISTER_KERNEL(equal_all,
                   int64_t,
                   float,
                   double) {}
+#define PD_REGISTER_COMPARE_KERNEL(name, func) \
+  PD_REGISTER_KERNEL(name,                     \
+                     KPS,                      \
+                     ALL_LAYOUT,               \
+                     phi::func##Kernel,        \
+                     bool,                     \
+                     int16_t,                  \
+                     int,                      \
+                     int64_t,                  \
+                     float,                    \
+                     double,                   \
+                     phi::dtype::float16,      \
+                     phi::dtype::bfloat16) {}  \
+  PD_REGISTER_KERNEL(name##_raw,               \
+                     KPS,                      \
+                     ALL_LAYOUT,               \
+                     phi::func##RawKernel,     \
+                     bool,                     \
+                     int16_t,                  \
+                     int,                      \
+                     int64_t,                  \
+                     float,                    \
+                     double,                   \
+                     phi::dtype::float16,      \
+                     phi::dtype::bfloat16) {}
+PD_REGISTER_COMPARE_KERNEL(less_than, LessThan)
+PD_REGISTER_COMPARE_KERNEL(less_equal, LessEqual)
+PD_REGISTER_COMPARE_KERNEL(greater_than, GreaterThan)
+PD_REGISTER_COMPARE_KERNEL(greater_equal, GreaterEqual)
+PD_REGISTER_COMPARE_KERNEL(equal, Equal)
+PD_REGISTER_COMPARE_KERNEL(not_equal, NotEqual)
 #endif
--- a/paddle/phi/kernels/kps/elementwise_kernel.cu
+++ b/paddle/phi/kernels/kps/elementwise_kernel.cu
@@ -93,20 +93,20 @@ using bfloat16 = phi::dtype::bfloat16;
 using complex64 = ::phi::dtype::complex<float>;
 using complex128 = ::phi::dtype::complex<double>;
-PD_REGISTER_KERNEL(fmax,
+PD_REGISTER_KERNEL(fmax_raw,
                   KPS,
                   ALL_LAYOUT,
-                   phi::FMaxKernel,
+                   phi::FMaxRawKernel,
                   float,
                   double,
                   int,
                   float16,
                   int64_t) {}
-PD_REGISTER_KERNEL(fmin,
+PD_REGISTER_KERNEL(fmin_raw,
                   KPS,
                   ALL_LAYOUT,
-                   phi::FMinKernel,
+                   phi::FMinRawKernel,
                   float,
                   double,
                   int,

--- a/paddle/phi/kernels/onednn/activation_kernel.cc
+++ b/paddle/phi/kernels/onednn/activation_kernel.cc
@@ -154,15 +154,15 @@ DEFINE_ONEDNN_ACTIVATION_KERNEL(Round, RoundOneDNNFunctor)
 DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(Elu, EluOneDNNFunctor, alpha)
 DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, ReluOneDNNFunctor, alpha)
 DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(Mish, MishOneDNNFunctor, threshold)
-DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(Swish, SwishOneDNNFunctor, beta)
+DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(SwishRaw, SwishOneDNNFunctor, beta)
 template <typename T, typename Context>
-void HardSwishKernel(const Context& dev_ctx,
+void HardSwishRawKernel(const Context& dev_ctx,
-                     const DenseTensor& x,
+                        const DenseTensor& x,
-                     float threshold,
+                        float threshold,
-                     float scale,
+                        float scale,
-                     float offset,
+                        float offset,
-                     DenseTensor* out) {
+                        DenseTensor* out) {
  HardSwishOneDNNFunctor<T> functor;
  functor(dev_ctx, x, threshold, 0, out);
 }
@@ -182,10 +182,10 @@ void GeluKernel(const Context& dev_ctx,
 }
 template <typename T, typename Context>
-void Relu6Kernel(const Context& dev_ctx,
+void Relu6RawKernel(const Context& dev_ctx,
-                 const DenseTensor& x,
+                    const DenseTensor& x,
-                 float threshold,
+                    float threshold,
-                 DenseTensor* out) {
+                    DenseTensor* out) {
  Relu6OneDNNFunctor<T> functor;
  functor(dev_ctx, x, 0, threshold, out);
 }
@@ -202,12 +202,12 @@ PD_REGISTER_ACTIVATION_KERNEL(abs, AbsKernel)
 PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel)
 PD_REGISTER_ACTIVATION_KERNEL(exp, ExpKernel)
 PD_REGISTER_ACTIVATION_KERNEL(gelu, GeluKernel)
-PD_REGISTER_ACTIVATION_KERNEL(hard_swish, HardSwishKernel)
+PD_REGISTER_ACTIVATION_KERNEL(hard_swish_raw, HardSwishRawKernel)
 PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
 PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel)
 PD_REGISTER_ACTIVATION_KERNEL(relu, ReluKernel)
-PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
+PD_REGISTER_ACTIVATION_KERNEL(relu6_raw, Relu6RawKernel)
 PD_REGISTER_ACTIVATION_KERNEL(sigmoid, SigmoidKernel)
 PD_REGISTER_ACTIVATION_KERNEL(sqrt, SqrtKernel)
-PD_REGISTER_ACTIVATION_KERNEL(swish, SwishKernel)
+PD_REGISTER_ACTIVATION_KERNEL(swish_raw, SwishRawKernel)
 PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel)
--- a/paddle/phi/kernels/sparse/cpu/unary_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/unary_kernel.cc
@@ -95,6 +95,7 @@ PD_REGISTER_SPARSE_UNARY_CPU_KERNEL(pow, Pow)
 PD_REGISTER_SPARSE_UNARY_CPU_KERNEL(scale, Scale)
 PD_REGISTER_SPARSE_UNARY_CPU_KERNEL(expm1, Expm1)
 PD_REGISTER_SPARSE_UNARY_CPU_KERNEL(relu6, Relu6)
+PD_REGISTER_SPARSE_UNARY_CPU_KERNEL(relu6_raw, Relu6Raw)
 PD_REGISTER_SPARSE_UNARY_CPU_KERNEL(leaky_relu, LeakyRelu)
 PD_REGISTER_KERNEL(divide_scalar_coo,

--- a/paddle/phi/kernels/sparse/gpu/unary_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/unary_kernel.cu
@@ -99,6 +99,7 @@ PD_REGISTER_SPARSE_UNARY_GPU_KERNEL(abs, Abs)
 PD_REGISTER_SPARSE_UNARY_GPU_KERNEL(pow, Pow)
 PD_REGISTER_SPARSE_UNARY_GPU_KERNEL(scale, Scale)
 PD_REGISTER_SPARSE_UNARY_GPU_KERNEL(expm1, Expm1)
+PD_REGISTER_SPARSE_UNARY_GPU_KERNEL(relu6_raw, Relu6Raw)
 PD_REGISTER_SPARSE_UNARY_GPU_KERNEL(relu6, Relu6)
 PD_REGISTER_SPARSE_UNARY_GPU_KERNEL(leaky_relu, LeakyRelu)

--- a/paddle/phi/kernels/sparse/impl/unary_kernel_impl.h
+++ b/paddle/phi/kernels/sparse/impl/unary_kernel_impl.h
@@ -89,9 +89,23 @@ DEFINE_SPARSE_UNARY_KERNEL(Relu)
 DEFINE_SPARSE_UNARY_KERNEL(Abs)
 DEFINE_SPARSE_UNARY_KERNEL(Expm1)
 DEFINE_SPARSE_UNARY_KERNEL_WITH_ONE_ATTR(Pow, factor)
-DEFINE_SPARSE_UNARY_KERNEL_WITH_ONE_ATTR(Relu6, threshold)
+DEFINE_SPARSE_UNARY_KERNEL_WITH_ONE_ATTR(Relu6Raw, threshold)
 DEFINE_SPARSE_UNARY_KERNEL_WITH_ONE_ATTR(LeakyRelu, alpha)
+template <typename T, typename Context>
+void Relu6CooKernel(const Context& dev_ctx,
+                    const SparseCooTensor& x,
+                    SparseCooTensor* out) {
+  Relu6RawCooKernel<T, Context>(dev_ctx, x, 6, out);
+}
+template <typename T, typename Context>
+void Relu6CsrKernel(const Context& dev_ctx,
+                    const SparseCsrTensor& x,
+                    SparseCsrTensor* out) {
+  Relu6RawCsrKernel<T, Context>(dev_ctx, x, 6, out);
+}
 template <typename T, typename Context>
 void ScaleCooKernel(const Context& dev_ctx,
                    const SparseCooTensor& x,

--- a/paddle/phi/kernels/xpu/activation_kernel.cc
+++ b/paddle/phi/kernels/xpu/activation_kernel.cc
@@ -356,10 +356,10 @@ struct XPUMishFunctor : public funcs::BaseActivationFunctor<T> {
 };
 template <typename T, typename Context>
-void SwishKernel(const Context& dev_ctx,
+void SwishRawKernel(const Context& dev_ctx,
-                 const DenseTensor& x,
+                    const DenseTensor& x,
-                 float beta,
+                    float beta,
-                 DenseTensor* out) {
+                    DenseTensor* out) {
  using XPUType = typename XPUTypeTrait<T>::Type;
  dev_ctx.template Alloc<T>(out);
  int r = xpu::swish(dev_ctx.x_context(),
@@ -415,7 +415,9 @@ DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Mish, XPUMishFunctor, threshold)
 DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu,
                                            XPULeakyReluFunctor,
                                            alpha)
-DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Relu6, XPURelu6Functor, threshold)
+DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Relu6Raw,
+                                            XPURelu6Functor,
+                                            threshold)
 DEFINE_XPU_ACTIVATION_KERNEL_WITH_TWO_ATTRS(Softplus,
                                            XPUSoftplusFunctor,
@@ -423,12 +425,12 @@ DEFINE_XPU_ACTIVATION_KERNEL_WITH_TWO_ATTRS(Softplus,
                                            threshold)
 template <typename T, typename Context>
-void HardSwishKernel(const Context& dev_ctx,
+void HardSwishRawKernel(const Context& dev_ctx,
-                     const DenseTensor& x,
+                        const DenseTensor& x,
-                     float threshold,
+                        float threshold,
-                     float scale,
+                        float scale,
-                     float offset,
+                        float offset,
-                     DenseTensor* out) {
+                        DenseTensor* out) {
  XPUHardSwishFunctor<T> functor;
  auto attrs = functor.GetAttrs();
  *(attrs[0].second) = threshold;
@@ -452,13 +454,13 @@ PD_REGISTER_KERNEL(
 PD_REGISTER_ACTIVATION_KERNEL(exp, ExpKernel)  // no grad
 PD_REGISTER_ACTIVATION_KERNEL(log, LogKernel)
 PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
-PD_REGISTER_ACTIVATION_KERNEL(hard_swish, HardSwishKernel)
+PD_REGISTER_ACTIVATION_KERNEL(hard_swish_raw, HardSwishRawKernel)
 PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel)
 PD_REGISTER_ACTIVATION_KERNEL(pow, PowKernel)
 PD_REGISTER_ACTIVATION_KERNEL(reciprocal, ReciprocalKernel)
-PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
+PD_REGISTER_ACTIVATION_KERNEL(relu6_raw, Relu6RawKernel)
 PD_REGISTER_ACTIVATION_KERNEL(sigmoid, SigmoidKernel)
 PD_REGISTER_ACTIVATION_KERNEL(sqrt, SqrtKernel)
-PD_REGISTER_ACTIVATION_KERNEL(swish, SwishKernel)
+PD_REGISTER_ACTIVATION_KERNEL(swish_raw, SwishRawKernel)
 PD_REGISTER_ACTIVATION_KERNEL(softplus, SoftplusKernel)
 PD_REGISTER_ACTIVATION_KERNEL(square, SquareKernel)
--- a/paddle/phi/kernels/xpu/compare_kernel.cc
+++ b/paddle/phi/kernels/xpu/compare_kernel.cc
@@ -52,48 +52,59 @@ void XPUCompareKernelImpl(const Context& dev_ctx,
  PADDLE_ENFORCE_XDNN_SUCCESS(ret, "compare op");
 }
-#define DEFINE_XPU_COMPARE_KERNEL(compare_kernel, functor)                  \
+#define DEFINE_XPU_COMPARE_KERNEL(name, functor)                            \
  template <typename T, typename Context>                                   \
-  void compare_kernel(const Context& dev_ctx,                               \
+  void name##RawKernel(const Context& dev_ctx,                              \
-                      const DenseTensor& x,                                 \
+                       const DenseTensor& x,                                \
-                      const DenseTensor& y,                                 \
+                       const DenseTensor& y,                                \
-                      int axis,                                             \
+                       int axis,                                            \
-                      DenseTensor* out) {                                   \
+                       DenseTensor* out) {                                  \
    using XPUType = typename XPUTypeTrait<T>::Type;                         \
    XPUCompareKernelImpl<T, XPUType, Context>(dev_ctx, x, y, out, functor); \
+  }                                                                         \
+  template <typename T, typename Context>                                   \
+  void name##Kernel(const Context& dev_ctx,                                 \
+                    const DenseTensor& x,                                   \
+                    const DenseTensor& y,                                   \
+                    DenseTensor* out) {                                     \
+    name##RawKernel<T, Context>(dev_ctx, x, y, -1, out);                    \
  }
-DEFINE_XPU_COMPARE_KERNEL(EqualKernel, xpu::broadcast_equal<XPUType>)
+DEFINE_XPU_COMPARE_KERNEL(Equal, xpu::broadcast_equal<XPUType>)
-DEFINE_XPU_COMPARE_KERNEL(NotEqualKernel, xpu::broadcast_not_equal<XPUType>)
+DEFINE_XPU_COMPARE_KERNEL(NotEqual, xpu::broadcast_not_equal<XPUType>)
-DEFINE_XPU_COMPARE_KERNEL(LessThanKernel, xpu::broadcast_less_than<XPUType>)
+DEFINE_XPU_COMPARE_KERNEL(LessThan, xpu::broadcast_less_than<XPUType>)
-DEFINE_XPU_COMPARE_KERNEL(LessEqualKernel, xpu::broadcast_less_equal<XPUType>)
+DEFINE_XPU_COMPARE_KERNEL(LessEqual, xpu::broadcast_less_equal<XPUType>)
-DEFINE_XPU_COMPARE_KERNEL(GreaterThanKernel,
+DEFINE_XPU_COMPARE_KERNEL(GreaterThan, xpu::broadcast_greater_than<XPUType>)
-                          xpu::broadcast_greater_than<XPUType>)
+DEFINE_XPU_COMPARE_KERNEL(GreaterEqual, xpu::broadcast_greater_equal<XPUType>)
-DEFINE_XPU_COMPARE_KERNEL(GreaterEqualKernel,
-                          xpu::broadcast_greater_equal<XPUType>)
 #undef DEFINE_XPU_COMPARE_KERNEL
 }  // namespace phi
 PD_REGISTER_KERNEL(
-    equal, XPU, ALL_LAYOUT, phi::EqualKernel, float, int, int64_t) {}
+    less_than, XPU, ALL_LAYOUT, phi::LessThanKernel, int, int64_t, float) {}
-PD_REGISTER_KERNEL(
-    not_equal, XPU, ALL_LAYOUT, phi::NotEqualKernel, float, int, int64_t) {}
+PD_REGISTER_KERNEL(less_than_raw,
-PD_REGISTER_KERNEL(
-    less_than, XPU, ALL_LAYOUT, phi::LessThanKernel, float, int, int64_t) {}
-PD_REGISTER_KERNEL(
-    less_equal, XPU, ALL_LAYOUT, phi::LessEqualKernel, float, int, int64_t) {}
-PD_REGISTER_KERNEL(greater_than,
-                   XPU,
-                   ALL_LAYOUT,
-                   phi::GreaterThanKernel,
-                   float,
-                   int,
-                   int64_t) {}
-PD_REGISTER_KERNEL(greater_equal,
                   XPU,
                   ALL_LAYOUT,
-                   phi::GreaterEqualKernel,
+                   phi::LessThanRawKernel,
-                   float,
                   int,
-                   int64_t) {}
+                   int64_t,
+                   float) {}
+#define PD_REGISTER_COMPARE_KERNEL(name, func)                          \
+  PD_REGISTER_KERNEL(                                                   \
+      name, XPU, ALL_LAYOUT, phi::func##Kernel, int, int64_t, float) {} \
+  PD_REGISTER_KERNEL(name##_raw,                                        \
+                     XPU,                                               \
+                     ALL_LAYOUT,                                        \
+                     phi::func##RawKernel,                              \
+                     int,                                               \
+                     int64_t,                                           \
+                     float) {}
+PD_REGISTER_COMPARE_KERNEL(less_equal, LessEqual)
+PD_REGISTER_COMPARE_KERNEL(greater_than, GreaterThan)
+PD_REGISTER_COMPARE_KERNEL(greater_equal, GreaterEqual)
+PD_REGISTER_COMPARE_KERNEL(equal, Equal)
+PD_REGISTER_COMPARE_KERNEL(not_equal, NotEqual)
--- a/paddle/phi/ops/compat/activation_sig.cc
+++ b/paddle/phi/ops/compat/activation_sig.cc
@@ -53,6 +53,19 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(STanh,
 DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu6, "relu6", "threshold");  // NOLINT
+KernelSignature HardSwishOpArgumentMapping(const ArgumentMappingContext& ctx) {
+  return KernelSignature(
+      "hard_swish_raw", {"X"}, {"threshold", "scale", "offset"}, {"Out"});
+}
+KernelSignature SwishOpArgumentMapping(const ArgumentMappingContext& ctx) {
+  return KernelSignature("swish_raw", {"X"}, {"beta"}, {"Out"});
+}
+KernelSignature Relu6OpArgumentMapping(const ArgumentMappingContext& ctx) {
+  return KernelSignature("relu6_raw", {"X"}, {"threshold"}, {"Out"});
+}
 KernelSignature PowOpArgumentMapping(const ArgumentMappingContext& ctx) {
  if (ctx.HasInput("FactorTensor")) {
    return KernelSignature("pow", {"X"}, {"FactorTensor"}, {"Out"});
@@ -108,10 +121,12 @@ PD_REGISTER_ARG_MAPPING_FN(stanh_grad, phi::STanhGradOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(brelu_grad, phi::HardTanhGradOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(relu6_grad, phi::Relu6GradOpArgumentMapping);
+PD_REGISTER_ARG_MAPPING_FN(relu6, phi::Relu6OpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(hard_swish_grad,
                           phi::HardSwishGradOpArgumentMapping);
+PD_REGISTER_ARG_MAPPING_FN(hard_swish, phi::HardSwishOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(swish_grad, phi::SwishGradOpArgumentMapping);
+PD_REGISTER_ARG_MAPPING_FN(swish, phi::SwishOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(pow_grad, phi::PowGradOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(pow_double_grad,
                           phi::PowDoubleGradOpArgumentMapping);

--- a/paddle/phi/ops/compat/compare_sig.cc
+++ b/paddle/phi/ops/compat/compare_sig.cc
@@ -17,27 +17,27 @@
 namespace phi {
 KernelSignature LessThanArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("less_than", {"X", "Y"}, {"axis"}, {"Out"});
+  return KernelSignature("less_than_raw", {"X", "Y"}, {"axis"}, {"Out"});
 }
 KernelSignature LessEqualArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("less_equal", {"X", "Y"}, {"axis"}, {"Out"});
+  return KernelSignature("less_equal_raw", {"X", "Y"}, {"axis"}, {"Out"});
 }
 KernelSignature GreaterThanArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("greater_than", {"X", "Y"}, {"axis"}, {"Out"});
+  return KernelSignature("greater_than_raw", {"X", "Y"}, {"axis"}, {"Out"});
 }
 KernelSignature GreaterEqualArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("greater_equal", {"X", "Y"}, {"axis"}, {"Out"});
+  return KernelSignature("greater_equal_raw", {"X", "Y"}, {"axis"}, {"Out"});
 }
 KernelSignature EqualArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("equal", {"X", "Y"}, {"axis"}, {"Out"});
+  return KernelSignature("equal_raw", {"X", "Y"}, {"axis"}, {"Out"});
 }
 KernelSignature NotEqualArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("not_equal", {"X", "Y"}, {"axis"}, {"Out"});
+  return KernelSignature("not_equal_raw", {"X", "Y"}, {"axis"}, {"Out"});
 }
 }  // namespace phi

--- a/paddle/phi/ops/compat/elementwise_sig.cc
+++ b/paddle/phi/ops/compat/elementwise_sig.cc
@@ -181,12 +181,12 @@ KernelSignature ElementwiseMulGradOpArgumentMapping(
 KernelSignature ElementwiseFMaxOpArgumentMapping(
    const ArgumentMappingContext& ctx) {
-  return KernelSignature("fmax", {"X", "Y"}, {"axis"}, {"Out"});
+  return KernelSignature("fmax_raw", {"X", "Y"}, {"axis"}, {"Out"});
 }
 KernelSignature ElementwiseFMinOpArgumentMapping(
    const ArgumentMappingContext& ctx) {
-  return KernelSignature("fmin", {"X", "Y"}, {"axis"}, {"Out"});
+  return KernelSignature("fmin_raw", {"X", "Y"}, {"axis"}, {"Out"});
 }
 KernelSignature ElementwiseFMaxGradOpArgumentMapping(

--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -2075,7 +2075,7 @@ def greater_than(x, y, cond=None, name=None):
    attrs = dict()
    if in_dygraph_mode():
-        return _C_ops.greater_than(x, y, -1)
+        return _C_ops.greater_than(x, y)
    else:
        helper.append_op(
            type='greater_than',
@@ -2173,8 +2173,7 @@ def equal(x, y, cond=None, name=None):
          out2 = fluid.layers.equal(x=label_cond,y=limit, cond=out_cond) #out2=[False, True] out_cond=[False, True]
    """
    if in_dygraph_mode():
-        default_axis = -1
+        return _C_ops.equal(x, y)
-        return _C_ops.equal(x, y, default_axis)
    check_variable_and_dtype(
        x, "x", ["float32", "float64", "int32", "int64"], "equal"

--- a/python/paddle/fluid/tests/custom_runtime/CMakeLists.txt
+++ b/python/paddle/fluid/tests/custom_runtime/CMakeLists.txt
 if(WITH_CUSTOM_DEVICE AND NOT WITH_GPU)
  set(PLUGIN_URL https://github.com/PaddlePaddle/PaddleCustomDevice.git)
-  set(PLUGIN_TAG 0698428ddba21e6baecb690579f37c48896f7d56)
+  set(PLUGIN_TAG develop)
  file(
    GLOB TEST_OPS

--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -402,7 +402,7 @@ def hardswish(x, name=None):
    if _in_legacy_dygraph():
        return _legacy_C_ops.hard_swish(x)
    if in_dygraph_mode():
-        return _C_ops.hardswish(x, 6, 6, 3)
+        return _C_ops.hardswish(x)
    check_variable_and_dtype(
        x, 'x', ['float16', 'float32', 'float64'], 'hardswish'
@@ -893,7 +893,7 @@ def relu6(x, name=None):
    """
    threshold = 6.0
    if in_dygraph_mode():
-        return _C_ops.relu6(x, threshold)
+        return _C_ops.relu6(x)
    if in_dynamic_mode():
        return _legacy_C_ops.relu6(x, 'threshold', threshold)
@@ -1388,7 +1388,7 @@ def swish(x, name=None):
            #        [-0.23840584,  0.        ,  0.73105854])
    """
    if in_dygraph_mode():
-        return _C_ops.swish(x, 1.0)
+        return _C_ops.swish(x)
    if _in_legacy_dygraph():
        return _legacy_C_ops.swish(x, 'beta', 1.0)

--- a/python/paddle/nn/functional/vision.py
+++ b/python/paddle/nn/functional/vision.py
@@ -92,7 +92,7 @@ def affine_grid(theta, out_shape, align_corners=True, name=None):
            if isinstance(out_shape, Variable)
            else out_shape
        )
-        return _C_ops.affine_grid(theta, _out_shape, use_cudnn, align_corners)
+        return _C_ops.affine_grid(theta, _out_shape, align_corners, use_cudnn)
    elif in_dynamic_mode():
        _out_shape = (
            out_shape.numpy().tolist()

--- a/python/paddle/sparse/nn/functional/activation.py
+++ b/python/paddle/sparse/nn/functional/activation.py
@@ -140,7 +140,7 @@ def relu6(x, name=None):
            sparse_x = dense_x.to_sparse_coo(1)
            out = paddle.sparse.nn.functional.relu6(sparse_x)
    """
-    return _C_ops.sparse_relu6(x, 6.0)
+    return _C_ops.sparse_relu6(x)
 @dygraph_only

--- a/python/paddle/tensor/logic.py
+++ b/python/paddle/tensor/logic.py
@@ -445,8 +445,7 @@ def equal(x, y, name=None):
        y = full(shape=[1], dtype=x.dtype, fill_value=y)
    if in_dygraph_mode():
-        default_axis = -1
+        return _C_ops.equal(x, y)
-        return _C_ops.equal(x, y, default_axis)
    else:
        if _in_legacy_dygraph():
            return _legacy_C_ops.equal(x, y)
@@ -502,8 +501,7 @@ def greater_equal(x, y, name=None):
            print(result1)  # result1 = [True False True]
    """
    if in_dygraph_mode():
-        default_axis = -1
+        return _C_ops.greater_equal(x, y)
-        return _C_ops.greater_equal(x, y, default_axis)
    else:
        if _in_legacy_dygraph():
            return _legacy_C_ops.greater_equal(x, y)
@@ -559,7 +557,7 @@ def greater_than(x, y, name=None):
            print(result1)  # result1 = [False False True]
    """
    if in_dygraph_mode():
-        return _C_ops.greater_than(x, y, -1)
+        return _C_ops.greater_than(x, y)
    else:
        if _in_legacy_dygraph():
            return _legacy_C_ops.greater_than(x, y)
@@ -616,8 +614,7 @@ def less_equal(x, y, name=None):
            print(result1)  # result1 = [True True False]
    """
    if in_dygraph_mode():
-        axis = -1
+        return _C_ops.less_equal(x, y)
-        return _C_ops.less_equal(x, y, axis)
    else:
        if _in_legacy_dygraph():
            return _legacy_C_ops.less_equal(x, y)
@@ -674,8 +671,7 @@ def less_than(x, y, name=None):
            print(result1)  # result1 = [False True False]
    """
    if in_dygraph_mode():
-        default_axis = -1
+        return _C_ops.less_than(x, y)
-        return _C_ops.less_than(x, y, default_axis)
    else:
        if _in_legacy_dygraph():
            return _legacy_C_ops.less_than(x, y)
@@ -732,8 +728,7 @@ def not_equal(x, y, name=None):
            print(result1)  # result1 = [False True True]
    """
    if in_dygraph_mode():
-        axis = -1
+        return _C_ops.not_equal(x, y)
-        return _C_ops.not_equal(x, y, axis)
    else:
        if _in_legacy_dygraph():
            return _legacy_C_ops.not_equal(x, y)

--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -1168,7 +1168,7 @@ def fmax(x, y, name=None):
    axis = -1
    act = None
    if in_dygraph_mode():
-        return _C_ops.fmax(x, y, axis)
+        return _C_ops.fmax(x, y)
    if _in_legacy_dygraph():
        return _elementwise_op_in_dygraph(
            x, y, axis=axis, act=act, op_name=op_type
@@ -1236,7 +1236,7 @@ def fmin(x, y, name=None):
    axis = -1
    act = None
    if in_dygraph_mode():
-        return _C_ops.fmin(x, y, axis)
+        return _C_ops.fmin(x, y)
    if _in_legacy_dygraph():
        return _elementwise_op_in_dygraph(
            x, y, axis=axis, act=act, op_name=op_type