[PHI]Standardise some C++ API (Part2) (#47510)

* standard_api * add hardtanh

[PHI]Standardise some C++ API (Part2) (#47510)
* standard_api * add hardtanh
399047d7 · YuanRisheng · GitHub · 957fbb02 · 399047d7 · 399047d7
84 changed file
--- a/paddle/fluid/framework/new_executor/standalone_executor_test.cc
+++ b/paddle/fluid/framework/new_executor/standalone_executor_test.cc
@@ -63,8 +63,8 @@ USE_OP_ITSELF(memcpy_d2h);
 USE_OP_ITSELF(fetch_v2);

 PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(uniform_random_raw, GPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(uniform_random, GPU, ALL_LAYOUT);
+PD_DECLARE_KERNEL(uniform_raw, GPU, ALL_LAYOUT);
+PD_DECLARE_KERNEL(uniform, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(transpose, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(reshape, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(split, GPU, ALL_LAYOUT);

--- a/paddle/fluid/operators/detection/yolov3_loss_op.cc
+++ b/paddle/fluid/operators/detection/yolov3_loss_op.cc
@@ -218,10 +218,10 @@ class Yolov3LossGradMaker : public framework::SingleGradOpMaker<T> {
 namespace ops = paddle::operators;
 DECLARE_INFER_SHAPE_FUNCTOR(yolov3_loss,
                            Yolov3LossInferShapeFunctor,
-                            PD_INFER_META(phi::Yolov3LossInferMeta));
+                            PD_INFER_META(phi::YoloLossInferMeta));
 DECLARE_INFER_SHAPE_FUNCTOR(yolov3_loss_grad,
                            Yolov3LossGradInferShapeFunctor,
-                            PD_INFER_META(phi::Yolov3LossGradInferMeta));
+                            PD_INFER_META(phi::YoloLossGradInferMeta));
 REGISTER_OPERATOR(yolov3_loss,
                  ops::Yolov3LossOp,
                  ops::Yolov3LossOpMaker,

--- a/paddle/fluid/operators/hierarchical_sigmoid_op.cc
+++ b/paddle/fluid/operators/hierarchical_sigmoid_op.cc
@@ -259,7 +259,7 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(
 namespace ops = paddle::operators;
 DECLARE_INFER_SHAPE_FUNCTOR(hierarchical_sigmoid,
                            HierarchicalSigmoidInferShapeFunctor,
-                            PD_INFER_META(phi::HierarchicalSigmoidInferMeta));
+                            PD_INFER_META(phi::HSigmoidLossInferMeta));
 REGISTER_OPERATOR(hierarchical_sigmoid,
                  ops::HierarchicalSigmoidOp,
                  ops::HierarchicalSigmoidOpMaker<int>,

--- a/paddle/fluid/operators/tril_triu_op.cc
+++ b/paddle/fluid/operators/tril_triu_op.cc
@@ -93,7 +93,7 @@ namespace ops = paddle::operators;
 namespace plat = paddle::platform;
 DECLARE_INFER_SHAPE_FUNCTOR(tril_triu,
                            TrilTriuInferShapeFunctor,
-                            PD_INFER_META(phi::TrilTriuInferMeta));
+                            PD_INFER_META(phi::TrilInferMeta));
 REGISTER_OPERATOR(tril_triu,
                  ops::TrilTriuOp,
                  ops::TrilTriuOpMaker,

--- a/paddle/fluid/operators/where_index_op.cc
+++ b/paddle/fluid/operators/where_index_op.cc
@@ -48,7 +48,7 @@ class WhereIndexOpMaker : public framework::OpProtoAndCheckerMaker {
 namespace ops = paddle::operators;
 DECLARE_INFER_SHAPE_FUNCTOR(where_index,
                            WhereIndexInferShapeFunctor,
-                            PD_INFER_META(phi::WhereIndexInferMeta));
+                            PD_INFER_META(phi::NonZeroInferMeta));
 REGISTER_OPERATOR(
    where_index,
    ops::WhereIndexOp,

--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -791,8 +791,8 @@
    func : hard_tanh_grad
  inplace : (out_grad -> x_grad)

- backward_op : hierarchical_sigmoid_grad
-  forward : hierarchical_sigmoid (Tensor x, Tensor w, Tensor label, Tensor path, Tensor code, Tensor bias, int num_classes, bool remote_prefetch, int trainer_id, int64_t[] height_sections, str[] epmap, str[] table_names, bool is_sparse) -> Tensor(out), Tensor(pre_out), Tensor(w_out)
+- backward_op : hsigmoid_loss_grad
+  forward : hsigmoid_loss (Tensor x, Tensor w, Tensor label, Tensor path, Tensor code, Tensor bias, int num_classes, bool remote_prefetch, int trainer_id, int64_t[] height_sections, str[] epmap, str[] table_names, bool is_sparse) -> Tensor(out), Tensor(pre_out), Tensor(w_out)
  args : (Tensor x, Tensor w, Tensor label, Tensor path, Tensor code, Tensor bias, Tensor pre_out, Tensor out_grad, int num_classes, bool remote_prefetch, int trainer_id, int64_t[] height_sections, str[] epmap, str[] table_names, bool is_sparse)
  output : Tensor(x_grad), Tensor(w_grad), Tensor(bias_grad)
  infer_meta :
@@ -800,7 +800,7 @@
    param : [x ,w, bias]
  optional: path, code, bias
  kernel :
-    func : hierarchical_sigmoid_grad
+    func : hsigmoid_loss_grad

 - backward_op : huber_loss_grad
  forward : huber_loss (Tensor input, Tensor label, float delta) -> Tensor(out), Tensor(residual)
@@ -1477,6 +1477,16 @@
  kernel :
    func : prelu_grad

+- backward_op : prod_grad
+  forward : prod (Tensor x, IntArray dims, bool keep_dim, bool reduce_all) -> Tensor(out)
+  args : (Tensor x, Tensor out, Tensor out_grad, IntArray dims,  bool keep_dim, bool reduce_all)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : prod_grad
+
 - backward_op : psroi_pool_grad
  forward : psroi_pool (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height, int pooled_width, int output_channels, float spatial_scale) -> Tensor(out)
  args : (Tensor x, Tensor boxes, Tensor boxes_num, Tensor out_grad, int pooled_height, int pooled_width, int output_channels, float spatial_scale)
@@ -1516,16 +1526,6 @@
  output : Tensor(x_grad)
  invoke : real_grad_impl(out_grad, x_grad)

- backward_op : reduce_prod_grad
-  forward : reduce_prod (Tensor x, IntArray dims, bool keep_dim, bool reduce_all) -> Tensor(out)
-  args : (Tensor x, Tensor out, Tensor out_grad, IntArray dims,  bool keep_dim, bool reduce_all)
-  output : Tensor(x_grad)
-  infer_meta :
-    func : UnchangedInferMeta
-    param : [x]
-  kernel :
-    func : prod_grad
-
 - backward_op : relu6_grad
  forward : relu6 (Tensor x, float threshold) -> Tensor(out)
  args : (Tensor out, Tensor out_grad, float threshold)
@@ -2234,15 +2234,15 @@
  kernel :
    func : triangular_solve_grad

- backward_op : tril_triu_grad
-  forward : tril_triu(Tensor x,  int diagonal,  bool lower) -> Tensor(out)
+- backward_op : tril_grad
+  forward : tril(Tensor x,  int diagonal,  bool lower) -> Tensor(out)
  args : (Tensor out_grad,  int diagonal,  bool lower)
  output : Tensor(x_grad)
  infer_meta :
    func : UnchangedInferMeta
    param : [out_grad]
  kernel :
-    func : tril_triu_grad
+    func : tril_grad

 - backward_op : trilinear_interp_grad
  forward : trilinear_interp (Tensor x, Tensor out_size, Tensor[] size_tensor, Tensor scale_tensor, str data_layout, int out_d, int out_h, int out_w, float[] scale, str interp_method, bool align_corners, int align_mode) -> Tensor(output)
@@ -2273,14 +2273,14 @@
    func : unfold_grad
  no_need_buffer : x

- backward_op : uniform_random_inplace_grad
-  forward : uniform_random_inplace(Tensor x, float min, float max, int seed, int diag_num, int diag_step, float diag_val) -> Tensor(out)
+- backward_op : uniform_inplace_grad
+  forward : uniform_inplace(Tensor x, float min, float max, int seed, int diag_num, int diag_step, float diag_val) -> Tensor(out)
  args : (Tensor out_grad, float min, float max, int seed, int diag_num, int diag_step, float diag_val)
  output : Tensor(x_grad)
  infer_meta :
    func : UniformRandomInplaceGradInferMeta
  kernel :
-    func : uniform_random_inplace_grad
+    func : uniform_inplace_grad
  inplace : (out_grad -> x_grad)

 - backward_op : unsqueeze_double_grad
@@ -2335,14 +2335,14 @@
    func : where_grad
  no_need_buffer : x, y

- backward_op : yolov3_loss_grad
-  forward : yolov3_loss(Tensor x, Tensor gt_box, Tensor gt_label, Tensor gt_score, int[] anchors, int[] anchor_mask, int class_num, float ignore_thresh, int downsample_ratio, bool use_label_smooth=true, float scale_x_y=1.0) -> Tensor(loss), Tensor(objectness_mask), Tensor(gt_match_mask)
+- backward_op : yolo_loss_grad
+  forward : yolo_loss(Tensor x, Tensor gt_box, Tensor gt_label, Tensor gt_score, int[] anchors, int[] anchor_mask, int class_num, float ignore_thresh, int downsample_ratio, bool use_label_smooth=true, float scale_x_y=1.0) -> Tensor(loss), Tensor(objectness_mask), Tensor(gt_match_mask)
  args : (Tensor x, Tensor gt_box, Tensor gt_label, Tensor gt_score, Tensor objectness_mask, Tensor gt_match_mask, Tensor loss_grad, int[] anchors, int[] anchor_mask, int class_num, float ignore_thresh, int downsample_ratio, bool use_label_smooth=true, float scale_x_y=1.0)
  output : Tensor(x_grad), Tensor(gt_box_grad), Tensor(gt_label_grad), Tensor(gt_score_grad)
  infer_meta :
-    func : Yolov3LossGradInferMeta
+    func : YoloLossGradInferMeta
  kernel :
-    func : yolov3_loss_grad
+    func : yolo_loss_grad
  optional : gt_score

 - backward_op: fold_grad

--- a/paddle/phi/api/yaml/legacy_ops.yaml
+++ b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -1036,17 +1036,6 @@
    func : hard_tanh
  backward : hardtanh_grad

- op : hierarchical_sigmoid
-  args : (Tensor x, Tensor w, Tensor label, Tensor path, Tensor code, Tensor bias, int num_classes, bool remote_prefetch, int trainer_id, int64_t[] height_sections, str[] epmap, str[] table_names, bool is_sparse)
-  output : Tensor(out), Tensor(pre_out), Tensor(w_out)
-  infer_meta :
-    func : HierarchicalSigmoidInferMeta
-  optional: path, code, bias
-  kernel :
-    func : hierarchical_sigmoid
-    data_type : x
-  backward : hierarchical_sigmoid_grad
-
 - op : histogram
  args : (Tensor input, int64_t bins, int min, int max)
  output : Tensor(out)
@@ -1055,6 +1044,17 @@
  kernel :
    func : histogram

+- op : hsigmoid_loss
+  args : (Tensor x, Tensor w, Tensor label, Tensor path, Tensor code, Tensor bias, int num_classes, bool remote_prefetch, int trainer_id, int64_t[] height_sections, str[] epmap, str[] table_names, bool is_sparse)
+  output : Tensor(out), Tensor(pre_out), Tensor(w_out)
+  infer_meta :
+    func : HSigmoidLossInferMeta
+  optional: path, code, bias
+  kernel :
+    func : hsigmoid_loss
+    data_type : x
+  backward : hsigmoid_loss_grad
+
 - op : huber_loss
  args : (Tensor input, Tensor label, float delta)
  output : Tensor(out), Tensor(residual)
@@ -1696,6 +1696,14 @@
    func : nms
    data_type : x

+- op : nonzero
+  args : (Tensor condition)
+  output : Tensor(out)
+  infer_meta :
+    func : NonZeroInferMeta
+  kernel :
+    func : nonzero
+
 - op : norm
  args : (Tensor x, int axis, float epsilon, bool is_test)
  output : Tensor(out), Tensor(norm)
@@ -1828,6 +1836,15 @@
  kernel :
    func : prior_box

+- op : prod
+  args : (Tensor x, IntArray dims, bool keep_dim, bool reduce_all)
+  output : Tensor
+  infer_meta :
+    func : ReduceIntArrayAxisInferMetaBase
+  kernel :
+    func : prod_raw
+  backward : prod_grad
+
 - op : psroi_pool
  args : (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height, int pooled_width, int output_channels, float spatial_scale)
  output : Tensor
@@ -1893,15 +1910,6 @@
    func : real
  backward : real_grad

- op : reduce_prod
-  args : (Tensor x, IntArray dims, bool keep_dim, bool reduce_all)
-  output : Tensor
-  infer_meta :
-    func : ReduceIntArrayAxisInferMetaBase
-  kernel :
-    func : prod_raw
-  backward : reduce_prod_grad
-
 - op : relu
  args : (Tensor x)
  output : Tensor(out)
@@ -2460,6 +2468,15 @@
    func : triangular_solve
  backward : triangular_solve_grad

+- op : tril
+  args : (Tensor x,  int diagonal,  bool lower)
+  output : Tensor(out)
+  infer_meta :
+    func : TrilInferMeta
+  kernel :
+    func : tril
+  backward : tril_grad
+
 - op : tril_indices
  args : (int rows, int cols, int offset, DataType dtype, Place place={})
  output : Tensor(out)
@@ -2472,15 +2489,6 @@
    data_type : dtype
    backend : place

- op : tril_triu
-  args : (Tensor x,  int diagonal,  bool lower)
-  output : Tensor(out)
-  infer_meta :
-    func : TrilTriuInferMeta
-  kernel :
-    func : tril_triu
-  backward : tril_triu_grad
-
 - op : trilinear_interp
  args : (Tensor x, Tensor out_size, Tensor[] size_tensor, Tensor scale_tensor, str data_layout, int out_d, int out_h, int out_w, float[] scale, str interp_method, bool align_corners, int align_mode)
  output : Tensor(output)
@@ -2535,14 +2543,14 @@
    func : unfold
  backward : unfold_grad

- op : uniform_random
+- op : uniform
  args : (IntArray shape,  DataType dtype,  Scalar min,  Scalar max,  int seed, Place place={})
  output : Tensor(out)
  infer_meta :
    func : UniformRandomInferMeta
    param: [shape, dtype]
  kernel :
-    func : uniform_random
+    func : uniform
    param: [shape, dtype, min, max, seed]
    data_type : dtype
    backend : place
@@ -2628,14 +2636,6 @@
    func : where
  backward : where_grad

- op : where_index
-  args : (Tensor condition)
-  output : Tensor(out)
-  infer_meta :
-    func : WhereIndexInferMeta
-  kernel :
-    func : where_index
-
 - op : yolo_box
  args : (Tensor x, Tensor img_size, int[] anchors, int class_num, float conf_thresh, int downsample_ratio, bool clip_bbox, float scale_x_y=1.0, bool iou_aware=false, float iou_aware_factor=0.5)
  output : Tensor(boxes), Tensor(scores)
@@ -2645,16 +2645,16 @@
    func : yolo_box
    data_type : x

- op : yolov3_loss
+- op : yolo_loss
  args : (Tensor x, Tensor gt_box, Tensor gt_label, Tensor gt_score, int[] anchors, int[] anchor_mask, int class_num, float ignore_thresh, int downsample_ratio, bool use_label_smooth=true, float scale_x_y=1.0)
  output : Tensor(loss), Tensor(objectness_mask), Tensor(gt_match_mask)
  infer_meta :
-    func : Yolov3LossInferMeta
+    func : YoloLossInferMeta
  kernel :
-    func : yolov3_loss
+    func : yolo_loss
    data_type : x
  optional : gt_score
-  backward : yolov3_loss_grad
+  backward : yolo_loss_grad

 - op : zeros
  args : (IntArray shape, DataType dtype=DataType::FLOAT32, Place place=CPUPlace())
@@ -2734,16 +2734,16 @@
  intermediate : reserve
  view : (dropout_state_in -> dropout_state_out)

- op: uniform_random_inplace
+- op: uniform_inplace
  args: (Tensor x, float min, float max, int seed, int diag_num, int diag_step, float diag_val)
  output: Tensor(out)
  infer_meta:
    func: UniformRandomInplaceInferMeta
  kernel:
-    func: uniform_random_inplace
+    func: uniform_inplace
    data_type: x
  inplace: (x -> out)
-  backward: uniform_random_inplace_grad
+  backward: uniform_inplace_grad

 - op: unpool
  args: (Tensor x, Tensor indices, int[] ksize, int[] strides, int[] padding, IntArray output_size, str data_format)

--- a/paddle/phi/infermeta/backward.cc
+++ b/paddle/phi/infermeta/backward.cc
@@ -987,24 +987,24 @@ void UnStackGradInferMeta(const std::vector<const MetaTensor*>& out_grad,
  x_grad->set_dtype(out_grad[0]->dtype());
 }

-void Yolov3LossGradInferMeta(const MetaTensor& x,
-                             const MetaTensor& gt_box,
-                             const MetaTensor& gt_label,
-                             const MetaTensor& gt_score,
-                             const MetaTensor& objectness_mask,
-                             const MetaTensor& gt_match_mask,
-                             const MetaTensor& loss_grad,
-                             const std::vector<int>& anchors,
-                             const std::vector<int>& anchor_mask,
-                             int class_num,
-                             float ignore_thresh,
-                             int downsample_ratio,
-                             bool use_label_smooth,
-                             float scale_x_y,
-                             MetaTensor* x_grad,
-                             MetaTensor* gt_box_grad,
-                             MetaTensor* gt_label_grad,
-                             MetaTensor* gt_score_grad) {
+void YoloLossGradInferMeta(const MetaTensor& x,
+                           const MetaTensor& gt_box,
+                           const MetaTensor& gt_label,
+                           const MetaTensor& gt_score,
+                           const MetaTensor& objectness_mask,
+                           const MetaTensor& gt_match_mask,
+                           const MetaTensor& loss_grad,
+                           const std::vector<int>& anchors,
+                           const std::vector<int>& anchor_mask,
+                           int class_num,
+                           float ignore_thresh,
+                           int downsample_ratio,
+                           bool use_label_smooth,
+                           float scale_x_y,
+                           MetaTensor* x_grad,
+                           MetaTensor* gt_box_grad,
+                           MetaTensor* gt_label_grad,
+                           MetaTensor* gt_score_grad) {
  if (x_grad) {
    x_grad->set_dims(x.dims());
    x_grad->set_dtype(x.dtype());

--- a/paddle/phi/infermeta/backward.h
+++ b/paddle/phi/infermeta/backward.h
@@ -385,24 +385,24 @@ void UnStackGradInferMeta(const std::vector<const MetaTensor*>& out_grad,
                          int axis,
                          MetaTensor* x_grad);

-void Yolov3LossGradInferMeta(const MetaTensor& x,
-                             const MetaTensor& gt_box,
-                             const MetaTensor& gt_label,
-                             const MetaTensor& gt_score,
-                             const MetaTensor& objectness_mask,
-                             const MetaTensor& gt_match_mask,
-                             const MetaTensor& loss_grad,
-                             const std::vector<int>& anchors,
-                             const std::vector<int>& anchor_mask,
-                             int class_num,
-                             float ignore_thresh,
-                             int downsample_ratio,
-                             bool use_label_smooth,
-                             float scale_x_y,
-                             MetaTensor* x_grad,
-                             MetaTensor* gt_box_grad,
-                             MetaTensor* gt_label_grad,
-                             MetaTensor* gt_score_grad);
+void YoloLossGradInferMeta(const MetaTensor& x,
+                           const MetaTensor& gt_box,
+                           const MetaTensor& gt_label,
+                           const MetaTensor& gt_score,
+                           const MetaTensor& objectness_mask,
+                           const MetaTensor& gt_match_mask,
+                           const MetaTensor& loss_grad,
+                           const std::vector<int>& anchors,
+                           const std::vector<int>& anchor_mask,
+                           int class_num,
+                           float ignore_thresh,
+                           int downsample_ratio,
+                           bool use_label_smooth,
+                           float scale_x_y,
+                           MetaTensor* x_grad,
+                           MetaTensor* gt_box_grad,
+                           MetaTensor* gt_label_grad,
+                           MetaTensor* gt_score_grad);

 void IndexAddGradInferMeta(const MetaTensor& index,
                           const MetaTensor& add_value,

--- a/paddle/phi/infermeta/multiary.cc
+++ b/paddle/phi/infermeta/multiary.cc
@@ -1328,22 +1328,22 @@ void GraphSampleNeighborsInferMeta(const MetaTensor& row,
  out_count->set_dtype(DataType::INT32);
 }

-void HierarchicalSigmoidInferMeta(const MetaTensor& x,
-                                  const MetaTensor& w,
-                                  const MetaTensor& label,
-                                  const MetaTensor& path,
-                                  const MetaTensor& code,
-                                  const MetaTensor& bias,
-                                  int num_classes,
-                                  bool remote_prefetch,
-                                  int trainer_id,
-                                  const std::vector<int64_t>& height_sections,
-                                  const std::vector<std::string>& epmap,
-                                  const std::vector<std::string>& table_names,
-                                  bool is_sparse,
-                                  MetaTensor* out,
-                                  MetaTensor* pre_out,
-                                  MetaTensor* w_out) {
+void HSigmoidLossInferMeta(const MetaTensor& x,
+                           const MetaTensor& w,
+                           const MetaTensor& label,
+                           const MetaTensor& path,
+                           const MetaTensor& code,
+                           const MetaTensor& bias,
+                           int num_classes,
+                           bool remote_prefetch,
+                           int trainer_id,
+                           const std::vector<int64_t>& height_sections,
+                           const std::vector<std::string>& epmap,
+                           const std::vector<std::string>& table_names,
+                           bool is_sparse,
+                           MetaTensor* out,
+                           MetaTensor* pre_out,
+                           MetaTensor* w_out) {
  const int64_t input_dims = x.dims()[0];
  const int64_t label_dims = label.dims()[0];
  PADDLE_ENFORCE_EQ(input_dims,
@@ -2762,20 +2762,20 @@ void WhereInferMeta(const MetaTensor& condition,
  out->share_meta(x);
 }

-void Yolov3LossInferMeta(const MetaTensor& x,
-                         const MetaTensor& gt_box,
-                         const MetaTensor& gt_label,
-                         const MetaTensor& gt_score,
-                         const std::vector<int>& anchors,
-                         const std::vector<int>& anchor_mask,
-                         int class_num,
-                         float ignore_thresh,
-                         int downsample_ratio,
-                         bool use_label_smooth,
-                         float scale_x_y,
-                         MetaTensor* loss,
-                         MetaTensor* objectness_mask,
-                         MetaTensor* gt_match_mask) {
+void YoloLossInferMeta(const MetaTensor& x,
+                       const MetaTensor& gt_box,
+                       const MetaTensor& gt_label,
+                       const MetaTensor& gt_score,
+                       const std::vector<int>& anchors,
+                       const std::vector<int>& anchor_mask,
+                       int class_num,
+                       float ignore_thresh,
+                       int downsample_ratio,
+                       bool use_label_smooth,
+                       float scale_x_y,
+                       MetaTensor* loss,
+                       MetaTensor* objectness_mask,
+                       MetaTensor* gt_match_mask) {
  auto dim_x = x.dims();
  auto dim_gtbox = gt_box.dims();
  auto dim_gtlabel = gt_label.dims();

--- a/paddle/phi/infermeta/multiary.h
+++ b/paddle/phi/infermeta/multiary.h
@@ -288,22 +288,22 @@ void GraphSampleNeighborsInferMeta(const MetaTensor& row,
                                   MetaTensor* out_count,
                                   MetaTensor* out_eids);

-void HierarchicalSigmoidInferMeta(const MetaTensor& x,
-                                  const MetaTensor& w,
-                                  const MetaTensor& label,
-                                  const MetaTensor& path,
-                                  const MetaTensor& code,
-                                  const MetaTensor& bias,
-                                  int num_classes,
-                                  bool remote_prefetch,
-                                  int trainer_id,
-                                  const std::vector<int64_t>& height_sections,
-                                  const std::vector<std::string>& epmap,
-                                  const std::vector<std::string>& table_names,
-                                  bool is_sparse,
-                                  MetaTensor* out,
-                                  MetaTensor* pre_out,
-                                  MetaTensor* w_out);
+void HSigmoidLossInferMeta(const MetaTensor& x,
+                           const MetaTensor& w,
+                           const MetaTensor& label,
+                           const MetaTensor& path,
+                           const MetaTensor& code,
+                           const MetaTensor& bias,
+                           int num_classes,
+                           bool remote_prefetch,
+                           int trainer_id,
+                           const std::vector<int64_t>& height_sections,
+                           const std::vector<std::string>& epmap,
+                           const std::vector<std::string>& table_names,
+                           bool is_sparse,
+                           MetaTensor* out,
+                           MetaTensor* pre_out,
+                           MetaTensor* w_out);

 void InterpolateInferMeta(
    const MetaTensor& x,
@@ -508,19 +508,19 @@ void WhereInferMeta(const MetaTensor& condition,
                    const MetaTensor& y,
                    MetaTensor* out);

-void Yolov3LossInferMeta(const MetaTensor& x,
-                         const MetaTensor& gt_box,
-                         const MetaTensor& gt_label,
-                         const MetaTensor& gt_score,
-                         const std::vector<int>& anchors,
-                         const std::vector<int>& anchor_mask,
-                         int class_num,
-                         float ignore_thresh,
-                         int downsample_ratio,
-                         bool use_label_smooth,
-                         float scale_x_y,
-                         MetaTensor* loss,
-                         MetaTensor* objectness_mask,
-                         MetaTensor* gt_match_mask);
+void YoloLossInferMeta(const MetaTensor& x,
+                       const MetaTensor& gt_box,
+                       const MetaTensor& gt_label,
+                       const MetaTensor& gt_score,
+                       const std::vector<int>& anchors,
+                       const std::vector<int>& anchor_mask,
+                       int class_num,
+                       float ignore_thresh,
+                       int downsample_ratio,
+                       bool use_label_smooth,
+                       float scale_x_y,
+                       MetaTensor* loss,
+                       MetaTensor* objectness_mask,
+                       MetaTensor* gt_match_mask);

 }  // namespace phi
--- a/paddle/phi/infermeta/ternary.cc
+++ b/paddle/phi/infermeta/ternary.cc
@@ -402,64 +402,6 @@ void InstanceNormInferMeta(const MetaTensor& x,
  }
 }

-void SendURecvInferMeta(const MetaTensor& x,
-                        const MetaTensor& src_index,
-                        const MetaTensor& dst_index,
-                        const std::string& reduce_op,
-                        const IntArray& out_size,
-                        MetaTensor* out,
-                        MetaTensor* dst_count) {
-  auto src_index_dims = src_index.dims();
-  if (src_index_dims.size() == 2) {
-    PADDLE_ENFORCE_EQ(src_index_dims[1],
-                      1,
-                      phi::errors::InvalidArgument(
-                          "The last dim of Src_index should be 1 when it "
-                          "is 2D, but we get %d",
-                          src_index_dims[1]));
-  } else {
-    PADDLE_ENFORCE_EQ(
-        src_index_dims.size(),
-        1,
-        phi::errors::InvalidArgument(
-            "The Src_index should be 1D, when it is not 2D, but we get %d",
-            src_index_dims.size()));
-  }
-
-  auto dst_index_dims = dst_index.dims();
-  if (dst_index_dims.size() == 2) {
-    PADDLE_ENFORCE_EQ(dst_index_dims[1],
-                      1,
-                      phi::errors::InvalidArgument(
-                          "The last dim of Dst_index should be 1 when it "
-                          "is 2D, but we get %d",
-                          dst_index_dims[1]));
-  } else {
-    PADDLE_ENFORCE_EQ(
-        dst_index_dims.size(),
-        1,
-        phi::errors::InvalidArgument("The Dst_index should be 1D, "
-                                     "when it is not 2D, but we get %d",
-                                     dst_index_dims.size()));
-  }
-
-  PADDLE_ENFORCE_EQ(src_index_dims[0],
-                    dst_index_dims[0],
-                    phi::errors::InvalidArgument(
-                        "Src_index and Dst_index should have the same shape."));
-
-  auto dims = x.dims();
-  std::vector<int64_t> dims_ = phi::vectorize(dims);
-  dims_[0] = -1;
-  out->set_dims(phi::make_ddim(dims_));
-  out->set_dtype(x.dtype());
-
-  if (reduce_op == "MEAN") {
-    dst_count->set_dims({-1});
-    dst_count->set_dtype(DataType::INT32);
-  }
-}
-
 void GroupNormInferMeta(const MetaTensor& x,
                        const MetaTensor& scale,
                        const MetaTensor& bias,
@@ -1164,6 +1106,64 @@ void ScatterNdAddInferMeta(const MetaTensor& x,
  out->set_dtype(x.dtype());
 }

+void SendURecvInferMeta(const MetaTensor& x,
+                        const MetaTensor& src_index,
+                        const MetaTensor& dst_index,
+                        const std::string& reduce_op,
+                        const IntArray& out_size,
+                        MetaTensor* out,
+                        MetaTensor* dst_count) {
+  auto src_index_dims = src_index.dims();
+  if (src_index_dims.size() == 2) {
+    PADDLE_ENFORCE_EQ(src_index_dims[1],
+                      1,
+                      phi::errors::InvalidArgument(
+                          "The last dim of Src_index should be 1 when it "
+                          "is 2D, but we get %d",
+                          src_index_dims[1]));
+  } else {
+    PADDLE_ENFORCE_EQ(
+        src_index_dims.size(),
+        1,
+        phi::errors::InvalidArgument(
+            "The Src_index should be 1D, when it is not 2D, but we get %d",
+            src_index_dims.size()));
+  }
+
+  auto dst_index_dims = dst_index.dims();
+  if (dst_index_dims.size() == 2) {
+    PADDLE_ENFORCE_EQ(dst_index_dims[1],
+                      1,
+                      phi::errors::InvalidArgument(
+                          "The last dim of Dst_index should be 1 when it "
+                          "is 2D, but we get %d",
+                          dst_index_dims[1]));
+  } else {
+    PADDLE_ENFORCE_EQ(
+        dst_index_dims.size(),
+        1,
+        phi::errors::InvalidArgument("The Dst_index should be 1D, "
+                                     "when it is not 2D, but we get %d",
+                                     dst_index_dims.size()));
+  }
+
+  PADDLE_ENFORCE_EQ(src_index_dims[0],
+                    dst_index_dims[0],
+                    phi::errors::InvalidArgument(
+                        "Src_index and Dst_index should have the same shape."));
+
+  auto dims = x.dims();
+  std::vector<int64_t> dims_ = phi::vectorize(dims);
+  dims_[0] = -1;
+  out->set_dims(phi::make_ddim(dims_));
+  out->set_dtype(x.dtype());
+
+  if (reduce_op == "MEAN") {
+    dst_count->set_dims({-1});
+    dst_count->set_dtype(DataType::INT32);
+  }
+}
+
 void SpectralNormInferMeta(const MetaTensor& weight,
                           const MetaTensor& u,
                           const MetaTensor& v,

--- a/paddle/phi/infermeta/ternary.h
+++ b/paddle/phi/infermeta/ternary.h
@@ -72,14 +72,6 @@ void InstanceNormInferMeta(const MetaTensor& x,
                           MetaTensor* saved_variance,
                           MetaConfig config = MetaConfig());

-void SendURecvInferMeta(const MetaTensor& x,
-                        const MetaTensor& src_index,
-                        const MetaTensor& dst_index,
-                        const std::string& reduce_op,
-                        const IntArray& out_size,
-                        MetaTensor* out,
-                        MetaTensor* dst_count);
-
 void GroupNormInferMeta(const MetaTensor& x,
                        const MetaTensor& scale,
                        const MetaTensor& bias,
@@ -186,6 +178,14 @@ void ScatterNdAddInferMeta(const MetaTensor& x,
                           const MetaTensor& updates,
                           MetaTensor* out);

+void SendURecvInferMeta(const MetaTensor& x,
+                        const MetaTensor& src_index,
+                        const MetaTensor& dst_index,
+                        const std::string& reduce_op,
+                        const IntArray& out_size,
+                        MetaTensor* out,
+                        MetaTensor* dst_count);
+
 void SpectralNormInferMeta(const MetaTensor& weight,
                           const MetaTensor& u,
                           const MetaTensor& v,

--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
--- a/paddle/phi/infermeta/unary.h
+++ b/paddle/phi/infermeta/unary.h
@@ -65,6 +65,11 @@ void BatchSizeLikeInferMeta(const MetaTensor& x,

 void CastInferMeta(const MetaTensor& x, DataType out_dtype, MetaTensor* out);

+void ChannelShuffleInferMeta(const MetaTensor& x,
+                             int groups,
+                             const std::string& data_format,
+                             MetaTensor* out);
+
 void CholeskyInferMeta(const MetaTensor& x, bool upper, MetaTensor* out);

 void ClassCenterSampleInferMeta(const MetaTensor& label,
@@ -191,6 +196,14 @@ void FlipInferMeta(const MetaTensor& x,
                   const std::vector<int>& axis,
                   MetaTensor* out);

+void FoldInferMeta(const MetaTensor& x,
+                   const std::vector<int>& output_sizes,
+                   const std::vector<int>& kernel_sizes,
+                   const std::vector<int>& strides,
+                   const std::vector<int>& paddings,
+                   const std::vector<int>& dilations,
+                   MetaTensor* out);
+
 void FrameInferMeta(const MetaTensor& x,
                    int frame_length,
                    int hop_length,
@@ -214,6 +227,8 @@ void GumbelSoftmaxInferMeta(const MetaTensor& x,
 void HistogramInferMeta(
    const MetaTensor& input, int64_t bins, int min, int max, MetaTensor* out);

+void IdentityLossInferMeta(const MetaTensor& x, int reduction, MetaTensor* out);
+
 void IncrementInferMeta(const MetaTensor& x, float value, MetaTensor* out);

 void InferMetaFromVecValue(const MetaTensor& x,
@@ -288,6 +303,8 @@ void NanmedianInferMeta(const MetaTensor& x,
                        MetaTensor* out,
                        MetaTensor* median_index);

+void NonZeroInferMeta(const MetaTensor& condition, MetaTensor* out);
+
 void NMSInferMeta(const MetaTensor& x, float threshold, MetaTensor* out);

 void NormInferMeta(const MetaTensor& x,
@@ -297,6 +314,14 @@ void NormInferMeta(const MetaTensor& x,
                   MetaTensor* out,
                   MetaTensor* norm);

+void OneHotRawInferMeta(const MetaTensor& x,
+                        const Scalar& depth,
+                        DataType dtype,
+                        bool allow_out_of_range,
+                        MetaTensor* out);
+
+void OneHotInferMeta(const MetaTensor& x, const Scalar& depth, MetaTensor* out);
+
 void OverlapAddInferMeta(const MetaTensor& x,
                         int hop_length,
                         int axis,
@@ -576,10 +601,10 @@ void TransposeGradInferMeta(const MetaTensor& x,
                            const std::vector<int>& axis,
                            MetaTensor* out);

-void TrilTriuInferMeta(const MetaTensor& x,
-                       int diagonal,
-                       bool lower,
-                       MetaTensor* out);
+void TrilInferMeta(const MetaTensor& x,
+                   int diagonal,
+                   bool lower,
+                   MetaTensor* out);

 void UnbindInferMeta(const MetaTensor& x,
                     int axis,
@@ -657,29 +682,4 @@ void UnStackInferMeta(const MetaTensor& x,
                      int num,
                      std::vector<MetaTensor*> outs);

-void OneHotRawInferMeta(const MetaTensor& x,
-                        const Scalar& depth,
-                        DataType dtype,
-                        bool allow_out_of_range,
-                        MetaTensor* out);
-
-void OneHotInferMeta(const MetaTensor& x, const Scalar& depth, MetaTensor* out);
-
-void WhereIndexInferMeta(const MetaTensor& condition, MetaTensor* out);
-
-void ChannelShuffleInferMeta(const MetaTensor& x,
-                             int groups,
-                             const std::string& data_format,
-                             MetaTensor* out);
-
-void IdentityLossInferMeta(const MetaTensor& x, int reduction, MetaTensor* out);
-
-void FoldInferMeta(const MetaTensor& x,
-                   const std::vector<int>& output_sizes,
-                   const std::vector<int>& kernel_sizes,
-                   const std::vector<int>& strides,
-                   const std::vector<int>& paddings,
-                   const std::vector<int>& dilations,
-                   MetaTensor* out);
-
 }  // namespace phi
--- a/paddle/phi/kernels/cpu/hierarchical_sigmoid_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/hierarchical_sigmoid_grad_kernel.cc
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/kernels/hierarchical_sigmoid_grad_kernel.h"
-
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/cpu/hierarchical_sigmoid_grad.h"
-
-namespace phi {
-
-template <typename T, typename Context>
-void HierarchicalSigmoidGradKernel(const Context& ctx,
-                                   const DenseTensor& x,
-                                   const DenseTensor& w,
-                                   const DenseTensor& label,
-                                   const paddle::optional<DenseTensor>& path,
-                                   const paddle::optional<DenseTensor>& code,
-                                   const paddle::optional<DenseTensor>& bias,
-                                   const DenseTensor& pre_out,
-                                   const DenseTensor& out_grad,
-                                   int num_classes,
-                                   bool remote_prefetch,
-                                   int trainer_id,
-                                   const std::vector<int64_t>& height_sections,
-                                   const std::vector<std::string>& epmap,
-                                   const std::vector<std::string>& table_names,
-                                   bool is_sparse,
-                                   DenseTensor* x_grad,
-                                   DenseTensor* w_grad,
-                                   DenseTensor* bias_grad) {
-  HierarchicalSigmoidGradKernelImpl<T>(ctx,
-                                       x,
-                                       w,
-                                       label,
-                                       path,
-                                       code,
-                                       bias,
-                                       pre_out,
-                                       out_grad,
-                                       num_classes,
-                                       remote_prefetch,
-                                       trainer_id,
-                                       height_sections,
-                                       epmap,
-                                       table_names,
-                                       is_sparse,
-                                       x_grad,
-                                       w_grad,
-                                       bias_grad);
-}
-
-}  // namespace phi
-
-PD_REGISTER_KERNEL(hierarchical_sigmoid_grad,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::HierarchicalSigmoidGradKernel,
-                   float,
-                   double) {}
--- a/paddle/phi/kernels/cpu/hierarchical_sigmoid_grad.h
+++ b/paddle/phi/kernels/cpu/hierarchical_sigmoid_grad.h
@@ -26,27 +26,26 @@ namespace phi {
 namespace math = paddle::operators::math;

 template <typename T, typename Context>
-void HierarchicalSigmoidGradKernelImpl(
-    const Context& ctx,
-    const DenseTensor& x,
-    const DenseTensor& w,
-    const DenseTensor& label,
-    const paddle::optional<DenseTensor>& path,
-    const paddle::optional<DenseTensor>& code,
-    const paddle::optional<DenseTensor>& bias,
-    const DenseTensor& pre_out,
-    const DenseTensor& out_grad,
-    int num_classes,
-    bool remote_prefetch,
-    int trainer_id,
-    const std::vector<int64_t>& height_sections,
-    const std::vector<std::string>& epmap,
-    const std::vector<std::string>& table_names,
-    bool is_sparse,
-    DenseTensor* x_grad,
-    DenseTensor* w_grad,
-    DenseTensor* bias_grad,
-    SelectedRows* w_grad_sr = nullptr) {
+void HSigmoidLossGradKernelImpl(const Context& ctx,
+                                const DenseTensor& x,
+                                const DenseTensor& w,
+                                const DenseTensor& label,
+                                const paddle::optional<DenseTensor>& path,
+                                const paddle::optional<DenseTensor>& code,
+                                const paddle::optional<DenseTensor>& bias,
+                                const DenseTensor& pre_out,
+                                const DenseTensor& out_grad,
+                                int num_classes,
+                                bool remote_prefetch,
+                                int trainer_id,
+                                const std::vector<int64_t>& height_sections,
+                                const std::vector<std::string>& epmap,
+                                const std::vector<std::string>& table_names,
+                                bool is_sparse,
+                                DenseTensor* x_grad,
+                                DenseTensor* w_grad,
+                                DenseTensor* bias_grad,
+                                SelectedRows* w_grad_sr = nullptr) {
  funcs::SetConstant<Context, T> zero;
  DenseTensor pre_out_grad;


--- a/paddle/phi/kernels/cpu/hsigmoid_loss_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/hsigmoid_loss_grad_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/hsigmoid_loss_grad_kernel.h"
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/cpu/hsigmoid_loss_grad.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void HSigmoidLossGradKernel(const Context& ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& w,
+                            const DenseTensor& label,
+                            const paddle::optional<DenseTensor>& path,
+                            const paddle::optional<DenseTensor>& code,
+                            const paddle::optional<DenseTensor>& bias,
+                            const DenseTensor& pre_out,
+                            const DenseTensor& out_grad,
+                            int num_classes,
+                            bool remote_prefetch,
+                            int trainer_id,
+                            const std::vector<int64_t>& height_sections,
+                            const std::vector<std::string>& epmap,
+                            const std::vector<std::string>& table_names,
+                            bool is_sparse,
+                            DenseTensor* x_grad,
+                            DenseTensor* w_grad,
+                            DenseTensor* bias_grad) {
+  HSigmoidLossGradKernelImpl<T>(ctx,
+                                x,
+                                w,
+                                label,
+                                path,
+                                code,
+                                bias,
+                                pre_out,
+                                out_grad,
+                                num_classes,
+                                remote_prefetch,
+                                trainer_id,
+                                height_sections,
+                                epmap,
+                                table_names,
+                                is_sparse,
+                                x_grad,
+                                w_grad,
+                                bias_grad);
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(hsigmoid_loss_grad,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::HSigmoidLossGradKernel,
+                   float,
+                   double) {}
--- a/paddle/phi/kernels/cpu/hierarchical_sigmoid_kernel.cc
+++ b/paddle/phi/kernels/cpu/hierarchical_sigmoid_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/hierarchical_sigmoid_kernel.h"
+#include "paddle/phi/kernels/hsigmoid_loss_kernel.h"

 #include "paddle/fluid/operators/math/matrix_bit_code.h"
 #include "paddle/fluid/platform/transform.h"
@@ -28,23 +28,23 @@ namespace phi {
 namespace math = paddle::operators::math;

 template <typename T, typename Context>
-void HierarchicalSigmoidKernel(const Context& ctx,
-                               const DenseTensor& x,
-                               const DenseTensor& w,
-                               const DenseTensor& label,
-                               const paddle::optional<DenseTensor>& path,
-                               const paddle::optional<DenseTensor>& code,
-                               const paddle::optional<DenseTensor>& bias,
-                               int num_classes,
-                               bool remote_prefetch,
-                               int trainer_id,
-                               const std::vector<int64_t>& height_sections,
-                               const std::vector<std::string>& epmap,
-                               const std::vector<std::string>& table_names,
-                               bool is_sparse,
-                               DenseTensor* out,
-                               DenseTensor* pre_out,
-                               DenseTensor* w_out) {
+void HSigmoidLossKernel(const Context& ctx,
+                        const DenseTensor& x,
+                        const DenseTensor& w,
+                        const DenseTensor& label,
+                        const paddle::optional<DenseTensor>& path,
+                        const paddle::optional<DenseTensor>& code,
+                        const paddle::optional<DenseTensor>& bias,
+                        int num_classes,
+                        bool remote_prefetch,
+                        int trainer_id,
+                        const std::vector<int64_t>& height_sections,
+                        const std::vector<std::string>& epmap,
+                        const std::vector<std::string>& table_names,
+                        bool is_sparse,
+                        DenseTensor* out,
+                        DenseTensor* pre_out,
+                        DenseTensor* w_out) {
  size_t num_classes_st = static_cast<size_t>(num_classes);
  // for remote prefetch

@@ -106,9 +106,5 @@ void HierarchicalSigmoidKernel(const Context& ctx,

 }  // namespace phi

-PD_REGISTER_KERNEL(hierarchical_sigmoid,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::HierarchicalSigmoidKernel,
-                   float,
-                   double) {}
+PD_REGISTER_KERNEL(
+    hsigmoid_loss, CPU, ALL_LAYOUT, phi::HSigmoidLossKernel, float, double) {}
--- a/paddle/phi/kernels/cpu/where_index_kernel.cc
+++ b/paddle/phi/kernels/cpu/where_index_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/where_index_kernel.h"
+#include "paddle/phi/kernels/nonzero_kernel.h"

 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/kernel_registry.h"
@@ -47,9 +47,9 @@ struct WhereIndexFunctor {
 };

 template <typename T, typename Context>
-void WhereIndexKernel(const Context& dev_ctx,
-                      const DenseTensor& condition,
-                      DenseTensor* out) {
+void NonZeroKernel(const Context& dev_ctx,
+                   const DenseTensor& condition,
+                   DenseTensor* out) {
  const T* cond_data = condition.data<T>();
  auto numel = condition.numel();
  auto dims = condition.dims();
@@ -83,10 +83,10 @@ void WhereIndexKernel(const Context& dev_ctx,

 }  // namespace phi

-PD_REGISTER_KERNEL(where_index,
+PD_REGISTER_KERNEL(nonzero,
                   CPU,
                   ALL_LAYOUT,
-                   phi::WhereIndexKernel,
+                   phi::NonZeroKernel,
                   int64_t,
                   int,
                   int16_t,

--- a/paddle/phi/kernels/cpu/reduce_prod_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/reduce_prod_grad_kernel.cc
@@ -12,16 +12,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/reduce_prod_grad_kernel.h"
+#include "paddle/phi/kernels/prod_grad_kernel.h"

 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h"
+#include "paddle/phi/kernels/impl/prod_grad_kernel_impl.h"

 PD_REGISTER_KERNEL(prod_grad,
                   CPU,
                   ALL_LAYOUT,
-                   phi::ReduceProdGradKernel,
+                   phi::ProdGradKernel,
                   float,
                   double,
                   int,

--- a/paddle/phi/kernels/cpu/reduce_prod_kernel.cc
+++ b/paddle/phi/kernels/cpu/reduce_prod_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/reduce_prod_kernel.h"
+#include "paddle/phi/kernels/prod_kernel.h"

 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"

--- a/paddle/phi/kernels/cpu/tril_triu_kernel.cc
+++ b/paddle/phi/kernels/cpu/tril_triu_kernel.cc
@@ -14,12 +14,12 @@

 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/tril_triu_kernel_impl.h"
+#include "paddle/phi/kernels/impl/tril_grad_kernel_impl.h"

-PD_REGISTER_KERNEL(tril_triu,
+PD_REGISTER_KERNEL(tril_grad,
                   CPU,
                   ALL_LAYOUT,
-                   phi::TrilTriuKernel,
+                   phi::TrilGradKernel,
                   bool,
                   float,
                   double,

--- a/paddle/phi/kernels/cpu/tril_triu_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/tril_triu_grad_kernel.cc
@@ -14,12 +14,12 @@

 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/tril_triu_grad_kernel_impl.h"
+#include "paddle/phi/kernels/impl/tril_kernel_impl.h"

-PD_REGISTER_KERNEL(tril_triu_grad,
+PD_REGISTER_KERNEL(tril,
                   CPU,
                   ALL_LAYOUT,
-                   phi::TrilTriuGradKernel,
+                   phi::TrilKernel,
                   bool,
                   float,
                   double,

--- a/paddle/phi/kernels/cpu/uniform_random_inplace_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/uniform_random_inplace_grad_kernel.cc
@@ -12,22 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "paddle/phi/kernels/uniform_random_inplace_grad_kernel.h"
+#include "paddle/phi/kernels/uniform_inplace_grad_kernel.h"

 #include "paddle/phi/core/kernel_registry.h"

 namespace phi {

 template <typename T, typename Context>
-void UniformRandomInplaceGradKernel(const Context& ctx,
-                                    const DenseTensor& out_grad,
-                                    float min,
-                                    float max,
-                                    int seed,
-                                    int diag_num,
-                                    int diag_step,
-                                    float diag_val,
-                                    DenseTensor* x_grad) {
+void UniformInplaceGradKernel(const Context& ctx,
+                              const DenseTensor& out_grad,
+                              float min,
+                              float max,
+                              int seed,
+                              int diag_num,
+                              int diag_step,
+                              float diag_val,
+                              DenseTensor* x_grad) {
  if (x_grad) {
    auto* data = ctx.template Alloc<T>(x_grad);
    std::fill(data, data + x_grad->numel(), T(0));
@@ -36,9 +36,9 @@ void UniformRandomInplaceGradKernel(const Context& ctx,

 }  // namespace phi

-PD_REGISTER_KERNEL(uniform_random_inplace_grad,
+PD_REGISTER_KERNEL(uniform_inplace_grad,
                   CPU,
                   ALL_LAYOUT,
-                   phi::UniformRandomInplaceGradKernel,
+                   phi::UniformInplaceGradKernel,
                   float,
                   double) {}
--- a/paddle/phi/kernels/cpu/uniform_random_inplace_kernel.cc
+++ b/paddle/phi/kernels/cpu/uniform_random_inplace_kernel.cc
@@ -12,22 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "paddle/phi/kernels/uniform_random_inplace_kernel.h"
+#include "paddle/phi/kernels/uniform_inplace_kernel.h"

 #include "paddle/phi/core/kernel_registry.h"

 namespace phi {

 template <typename T, typename Context>
-void UniformRandomInplaceKernel(const Context& ctx,
-                                const DenseTensor& x,
-                                float min,
-                                float max,
-                                int seed,
-                                int diag_num,
-                                int diag_step,
-                                float diag_val,
-                                DenseTensor* out) {
+void UniformInplaceKernel(const Context& ctx,
+                          const DenseTensor& x,
+                          float min,
+                          float max,
+                          int seed,
+                          int diag_num,
+                          int diag_step,
+                          float diag_val,
+                          DenseTensor* out) {
  T* data = ctx.template Alloc<T>(out);
  int64_t size = out->numel();
  std::uniform_real_distribution<T> dist(static_cast<T>(min),
@@ -46,9 +46,9 @@ void UniformRandomInplaceKernel(const Context& ctx,

 }  // namespace phi

-PD_REGISTER_KERNEL(uniform_random_inplace,
+PD_REGISTER_KERNEL(uniform_inplace,
                   CPU,
                   ALL_LAYOUT,
-                   phi::UniformRandomInplaceKernel,
+                   phi::UniformInplaceKernel,
                   float,
                   double) {}
--- a/paddle/phi/kernels/cpu/uniform_random_kernel.cc
+++ b/paddle/phi/kernels/cpu/uniform_random_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/uniform_random_kernel.h"
+#include "paddle/phi/kernels/uniform_kernel.h"

 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/uniform_real_distribution.h"
@@ -20,16 +20,16 @@
 namespace phi {

 template <typename T, typename Context>
-void UniformRandomRawKernel(const Context &dev_ctx,
-                            const IntArray &shape,
-                            DataType dtype,
-                            const Scalar &min,
-                            const Scalar &max,
-                            int seed,
-                            int diag_num,
-                            int diag_step,
-                            float diag_val,
-                            DenseTensor *out) {
+void UniformRawKernel(const Context &dev_ctx,
+                      const IntArray &shape,
+                      DataType dtype,
+                      const Scalar &min,
+                      const Scalar &max,
+                      int seed,
+                      int diag_num,
+                      int diag_step,
+                      float diag_val,
+                      DenseTensor *out) {
  out->Resize(phi::make_ddim(shape.GetData()));
  T *data = dev_ctx.template Alloc<T>(out);
  auto size = out->numel();
@@ -63,10 +63,10 @@ void UniformRandomRawKernel(const Context &dev_ctx,

 }  // namespace phi

-PD_REGISTER_KERNEL(uniform_random_raw,
+PD_REGISTER_KERNEL(uniform_raw,
                   CPU,
                   ALL_LAYOUT,
-                   phi::UniformRandomRawKernel,
+                   phi::UniformRawKernel,
                   float,
                   double,
                   phi::dtype::bfloat16) {}
--- a/paddle/phi/kernels/cpu/yolov3_loss_functor.h
+++ b/paddle/phi/kernels/cpu/yolov3_loss_functor.h
--- a/paddle/phi/kernels/cpu/yolov3_loss_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/yolov3_loss_grad_kernel.cc
@@ -12,14 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/yolov3_loss_grad_kernel.h"
+#include "paddle/phi/kernels/yolo_loss_grad_kernel.h"

 #include <algorithm>
 #include <vector>

 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/cpu/yolov3_loss_functor.h"
+#include "paddle/phi/kernels/cpu/yolo_loss_functor.h"
 #include "paddle/phi/kernels/funcs/math_function.h"

 namespace phi {
@@ -117,25 +117,25 @@ static inline void CalcObjnessLossGrad(T* input_grad,
 }

 template <typename T, typename Context>
-void Yolov3LossGradKernel(const Context& dev_ctx,
-                          const DenseTensor& x,
-                          const DenseTensor& gt_box,
-                          const DenseTensor& gt_label,
-                          const paddle::optional<DenseTensor>& gt_score,
-                          const DenseTensor& objectness_mask,
-                          const DenseTensor& gt_match_mask,
-                          const DenseTensor& loss_grad,
-                          const std::vector<int>& anchors,
-                          const std::vector<int>& anchor_mask,
-                          int class_num,
-                          float ignore_thresh,
-                          int downsample_ratio,
-                          bool use_label_smooth,
-                          float scale_x_y,
-                          DenseTensor* x_grad,
-                          DenseTensor* gt_box_grad,
-                          DenseTensor* gt_label_grad,
-                          DenseTensor* gt_score_grad) {
+void YoloLossGradKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        const DenseTensor& gt_box,
+                        const DenseTensor& gt_label,
+                        const paddle::optional<DenseTensor>& gt_score,
+                        const DenseTensor& objectness_mask,
+                        const DenseTensor& gt_match_mask,
+                        const DenseTensor& loss_grad,
+                        const std::vector<int>& anchors,
+                        const std::vector<int>& anchor_mask,
+                        int class_num,
+                        float ignore_thresh,
+                        int downsample_ratio,
+                        bool use_label_smooth,
+                        float scale_x_y,
+                        DenseTensor* x_grad,
+                        DenseTensor* gt_box_grad,
+                        DenseTensor* gt_label_grad,
+                        DenseTensor* gt_score_grad) {
  auto* input = &x;
  auto input_grad = x_grad;
  auto* objness_mask = &objectness_mask;
@@ -237,9 +237,5 @@ void Yolov3LossGradKernel(const Context& dev_ctx,

 }  // namespace phi

-PD_REGISTER_KERNEL(yolov3_loss_grad,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::Yolov3LossGradKernel,
-                   float,
-                   double) {}
+PD_REGISTER_KERNEL(
+    yolo_loss_grad, CPU, ALL_LAYOUT, phi::YoloLossGradKernel, float, double) {}
--- a/paddle/phi/kernels/cpu/yolov3_loss_kernel.cc
+++ b/paddle/phi/kernels/cpu/yolov3_loss_kernel.cc
@@ -12,14 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/yolov3_loss_kernel.h"
+#include "paddle/phi/kernels/yolo_loss_kernel.h"

 #include <algorithm>
 #include <vector>

 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/cpu/yolov3_loss_functor.h"
+#include "paddle/phi/kernels/cpu/yolo_loss_functor.h"
 #include "paddle/phi/kernels/funcs/math_function.h"

 namespace phi {
@@ -178,21 +178,21 @@ static void inline GtValid(bool* valid,
 }

 template <typename T, typename Context>
-void Yolov3LossKernel(const Context& dev_ctx,
-                      const DenseTensor& x,
-                      const DenseTensor& gt_box,
-                      const DenseTensor& gt_label,
-                      const paddle::optional<DenseTensor>& gt_score,
-                      const std::vector<int>& anchors,
-                      const std::vector<int>& anchor_mask,
-                      int class_num,
-                      float ignore_thresh,
-                      int downsample_ratio,
-                      bool use_label_smooth,
-                      float scale_x_y,
-                      DenseTensor* loss,
-                      DenseTensor* objectness_mask,
-                      DenseTensor* gt_match_mask) {
+void YoloLossKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& gt_box,
+                    const DenseTensor& gt_label,
+                    const paddle::optional<DenseTensor>& gt_score,
+                    const std::vector<int>& anchors,
+                    const std::vector<int>& anchor_mask,
+                    int class_num,
+                    float ignore_thresh,
+                    int downsample_ratio,
+                    bool use_label_smooth,
+                    float scale_x_y,
+                    DenseTensor* loss,
+                    DenseTensor* objectness_mask,
+                    DenseTensor* gt_match_mask) {
  auto* input = &x;
  auto objness_mask = objectness_mask;
  float scale = scale_x_y;
@@ -371,4 +371,4 @@ void Yolov3LossKernel(const Context& dev_ctx,
 }  // namespace phi

 PD_REGISTER_KERNEL(
-    yolov3_loss, CPU, ALL_LAYOUT, phi::Yolov3LossKernel, float, double) {}
+    yolo_loss, CPU, ALL_LAYOUT, phi::YoloLossKernel, float, double) {}
--- a/paddle/phi/kernels/gpu/lstsq_kernel.cu
+++ b/paddle/phi/kernels/gpu/lstsq_kernel.cu
@@ -23,7 +23,7 @@
 #include "paddle/phi/kernels/funcs/slice.h"
 #include "paddle/phi/kernels/impl/lstsq_kernel_impl.h"
 #include "paddle/phi/kernels/impl/qr_kernel_impl.h"
-#include "paddle/phi/kernels/impl/tril_triu_kernel_impl.h"
+#include "paddle/phi/kernels/impl/tril_kernel_impl.h"
 #include "paddle/phi/kernels/lstsq_kernel.h"
 #include "paddle/phi/kernels/matmul_kernel.h"
 #include "paddle/phi/kernels/transpose_kernel.h"
@@ -110,7 +110,7 @@ void LstsqKernel(const Context& dev_ctx,
    DenseTensor* res_r = new DenseTensor();
    res_r->Resize(phi::make_ddim({batch_count, min_mn, min_mn}));
    dev_ctx.template Alloc<T>(res_r);
-    phi::TrilTriuKernel<T>(dev_ctx, slice_r, 0, false, res_r);
+    phi::TrilKernel<T>(dev_ctx, slice_r, 0, false, res_r);

    DenseTensor trans_y = phi::TransposeLast2Dim<T>(dev_ctx, tmp_y);
    DenseTensor slice_y =
@@ -135,7 +135,7 @@ void LstsqKernel(const Context& dev_ctx,
    DenseTensor* res_r = new DenseTensor();
    res_r->Resize(phi::make_ddim({batch_count, min_mn, min_mn}));
    dev_ctx.template Alloc<T>(res_r);
-    phi::TrilTriuKernel<T>(dev_ctx, slice_r, 0, false, res_r);
+    phi::TrilKernel<T>(dev_ctx, slice_r, 0, false, res_r);

    phi::TriangularSolveKernel<T, Context>(
        dev_ctx, *res_r, *new_y, true, true, false, solution);

--- a/paddle/phi/kernels/gpu/where_index_kernel.cu
+++ b/paddle/phi/kernels/gpu/where_index_kernel.cu
@@ -25,7 +25,7 @@ namespace cub = hipcub;
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/phi/kernels/funcs/select_impl.cu.h"
-#include "paddle/phi/kernels/where_index_kernel.h"
+#include "paddle/phi/kernels/nonzero_kernel.h"

 namespace phi {
 template <typename MaskT, typename IndexT, typename OutT>
@@ -62,9 +62,9 @@ struct IndexFunctor {
 };

 template <typename T, typename Context>
-void WhereIndexKernel(const Context &dev_ctx,
-                      const DenseTensor &condition,
-                      DenseTensor *out) {
+void NonZeroKernel(const Context &dev_ctx,
+                   const DenseTensor &condition,
+                   DenseTensor *out) {
  DenseTensor in_data;
  auto dims = condition.dims();
  using Functor = IndexFunctor<T, int64_t, int64_t>;
@@ -74,10 +74,10 @@ void WhereIndexKernel(const Context &dev_ctx,
 }
 }  // namespace phi

-PD_REGISTER_KERNEL(where_index,
+PD_REGISTER_KERNEL(nonzero,
                   GPU,
                   ALL_LAYOUT,
-                   phi::WhereIndexKernel,
+                   phi::NonZeroKernel,
                   int64_t,
                   int,
                   int16_t,

--- a/paddle/phi/kernels/gpu/reduce_prod_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/reduce_prod_grad_kernel.cu
@@ -12,16 +12,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/reduce_prod_grad_kernel.h"
+#include "paddle/phi/kernels/prod_grad_kernel.h"

 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h"
+#include "paddle/phi/kernels/impl/prod_grad_kernel_impl.h"

 PD_REGISTER_KERNEL(prod_grad,
                   GPU,
                   ALL_LAYOUT,
-                   phi::ReduceProdGradKernel,
+                   phi::ProdGradKernel,
                   float,
                   double,
                   int,

--- a/paddle/phi/kernels/gpu/qr_kernel.cu
+++ b/paddle/phi/kernels/gpu/qr_kernel.cu
@@ -31,7 +31,7 @@
 #include "paddle/phi/kernels/qr_kernel.h"
 #include "paddle/phi/kernels/slice_kernel.h"
 #include "paddle/phi/kernels/transpose_kernel.h"
-#include "paddle/phi/kernels/tril_triu_kernel.h"
+#include "paddle/phi/kernels/tril_kernel.h"

 namespace phi {

@@ -103,12 +103,12 @@ void QrKernel(const Context& ctx,
    auto trans_qr = TransposeLast2Dim<T, Context>(ctx, qr);
    auto sliced_qr = SliceKernel<T, Context>(
        ctx, trans_qr, {trans_qr.dims().size() - 2}, {0}, {min_mn}, {1}, {});
-    auto tmp_r = TrilTriu<T, Context>(ctx, sliced_qr, 0, false);
+    auto tmp_r = Tril<T, Context>(ctx, sliced_qr, 0, false);
    // Transpose 'tmp_r' to retore the original row-major order
    phi::Copy(ctx, tmp_r, r->place(), false, r);
  } else {
    auto trans_qr = TransposeLast2Dim<T, Context>(ctx, qr);
-    auto tmp_r = TrilTriu<T, Context>(ctx, trans_qr, 0, false);
+    auto tmp_r = Tril<T, Context>(ctx, trans_qr, 0, false);
    // Transpose 'tmp_r' to retore the original row-major order
    phi::Copy(ctx, tmp_r, r->place(), false, r);
  }

--- a/paddle/phi/kernels/gpu/tril_triu_kernel.cu
+++ b/paddle/phi/kernels/gpu/tril_triu_kernel.cu
@@ -14,12 +14,12 @@

 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/tril_triu_kernel_impl.h"
+#include "paddle/phi/kernels/impl/tril_grad_kernel_impl.h"

-PD_REGISTER_KERNEL(tril_triu,
+PD_REGISTER_KERNEL(tril_grad,
                   GPU,
                   ALL_LAYOUT,
-                   phi::TrilTriuKernel,
+                   phi::TrilGradKernel,
                   bool,
                   float,
                   double,

--- a/paddle/phi/kernels/gpu/tril_triu_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/tril_triu_grad_kernel.cu
@@ -14,12 +14,12 @@

 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/tril_triu_grad_kernel_impl.h"
+#include "paddle/phi/kernels/impl/tril_kernel_impl.h"

-PD_REGISTER_KERNEL(tril_triu_grad,
+PD_REGISTER_KERNEL(tril,
                   GPU,
                   ALL_LAYOUT,
-                   phi::TrilTriuGradKernel,
+                   phi::TrilKernel,
                   bool,
                   float,
                   double,

--- a/paddle/phi/kernels/gpu/uniform_random_inplace_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/uniform_random_inplace_grad_kernel.cu
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "paddle/phi/kernels/uniform_random_inplace_grad_kernel.h"
+#include "paddle/phi/kernels/uniform_inplace_grad_kernel.h"

 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/full_kernel.h"
@@ -20,15 +20,15 @@ limitations under the License. */
 namespace phi {

 template <typename T, typename Context>
-void UniformRandomInplaceGradKernel(const Context& ctx,
-                                    const DenseTensor& out_grad,
-                                    float min,
-                                    float max,
-                                    int seed,
-                                    int diag_num,
-                                    int diag_step,
-                                    float diag_val,
-                                    DenseTensor* x_grad) {
+void UniformInplaceGradKernel(const Context& ctx,
+                              const DenseTensor& out_grad,
+                              float min,
+                              float max,
+                              int seed,
+                              int diag_num,
+                              int diag_step,
+                              float diag_val,
+                              DenseTensor* x_grad) {
  auto dims = vectorize(x_grad->dims());
  float value = static_cast<float>(0.0f);
  phi::FullKernel<T>(ctx, dims, value, phi::DataType::UNDEFINED, x_grad);
@@ -36,9 +36,9 @@ void UniformRandomInplaceGradKernel(const Context& ctx,

 }  // namespace phi

-PD_REGISTER_KERNEL(uniform_random_inplace_grad,
+PD_REGISTER_KERNEL(uniform_inplace_grad,
                   GPU,
                   ALL_LAYOUT,
-                   phi::UniformRandomInplaceGradKernel,
+                   phi::UniformInplaceGradKernel,
                   float,
                   double) {}
--- a/paddle/phi/kernels/gpu/uniform_random_inplace_kernel.cu
+++ b/paddle/phi/kernels/gpu/uniform_random_inplace_kernel.cu
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "paddle/phi/kernels/uniform_random_inplace_kernel.h"
+#include "paddle/phi/kernels/uniform_inplace_kernel.h"

 #include <thrust/random.h>

@@ -54,15 +54,15 @@ struct UniformGenerator {
 };

 template <typename T, typename Context>
-void UniformRandomInplaceKernel(const Context& ctx,
-                                const DenseTensor& x,
-                                float min,
-                                float max,
-                                int seed,
-                                int diag_num,
-                                int diag_step,
-                                float diag_val,
-                                DenseTensor* out) {
+void UniformInplaceKernel(const Context& ctx,
+                          const DenseTensor& x,
+                          float min,
+                          float max,
+                          int seed,
+                          int diag_num,
+                          int diag_step,
+                          float diag_val,
+                          DenseTensor* out) {
  ctx.template Alloc<T>(out);
  if (seed == 0) {
    // Use global Generator seed
@@ -80,9 +80,9 @@ void UniformRandomInplaceKernel(const Context& ctx,

 }  // namespace phi

-PD_REGISTER_KERNEL(uniform_random_inplace,
+PD_REGISTER_KERNEL(uniform_inplace,
                   GPU,
                   ALL_LAYOUT,
-                   phi::UniformRandomInplaceKernel,
+                   phi::UniformInplaceKernel,
                   float,
                   double) {}
--- a/paddle/phi/kernels/gpu/uniform_random_kernel.cu
+++ b/paddle/phi/kernels/gpu/uniform_random_kernel.cu
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/uniform_random_kernel.h"
+#include "paddle/phi/kernels/uniform_kernel.h"

 #include <thrust/random.h>

@@ -54,16 +54,16 @@ struct UniformGenerator {
 };

 template <typename T, typename Context>
-void UniformRandomRawKernel(const Context& dev_ctx,
-                            const IntArray& shape,
-                            DataType dtype,
-                            const Scalar& min,
-                            const Scalar& max,
-                            int seed,
-                            int diag_num,
-                            int diag_step,
-                            float diag_val,
-                            DenseTensor* out) {
+void UniformRawKernel(const Context& dev_ctx,
+                      const IntArray& shape,
+                      DataType dtype,
+                      const Scalar& min,
+                      const Scalar& max,
+                      int seed,
+                      int diag_num,
+                      int diag_step,
+                      float diag_val,
+                      DenseTensor* out) {
  out->Resize(phi::make_ddim(shape.GetData()));
  dev_ctx.template Alloc<T>(out);
  if (seed == 0) {
@@ -86,10 +86,10 @@ void UniformRandomRawKernel(const Context& dev_ctx,

 }  // namespace phi

-PD_REGISTER_KERNEL(uniform_random_raw,
+PD_REGISTER_KERNEL(uniform_raw,
                   GPU,
                   ALL_LAYOUT,
-                   phi::UniformRandomRawKernel,
+                   phi::UniformRawKernel,
                   float,
                   double,
                   phi::dtype::float16) {}
--- a/paddle/phi/kernels/hierarchical_sigmoid_grad_kernel.h
+++ b/paddle/phi/kernels/hierarchical_sigmoid_grad_kernel.h
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "paddle/phi/core/dense_tensor.h"
-
-namespace phi {
-
-template <typename T, typename Context>
-void HierarchicalSigmoidGradKernel(const Context& ctx,
-                                   const DenseTensor& x,
-                                   const DenseTensor& w,
-                                   const DenseTensor& label,
-                                   const paddle::optional<DenseTensor>& path,
-                                   const paddle::optional<DenseTensor>& code,
-                                   const paddle::optional<DenseTensor>& bias,
-                                   const DenseTensor& pre_out,
-                                   const DenseTensor& out_grad,
-                                   int num_classes,
-                                   bool remote_prefetch,
-                                   int trainer_id,
-                                   const std::vector<int64_t>& height_sections,
-                                   const std::vector<std::string>& epmap,
-                                   const std::vector<std::string>& table_names,
-                                   bool is_sparse,
-                                   DenseTensor* x_grad,
-                                   DenseTensor* w_grad,
-                                   DenseTensor* bias_grad);
-
-}  // namespace phi
--- a/paddle/phi/kernels/hierarchical_sigmoid_kernel.h
+++ b/paddle/phi/kernels/hierarchical_sigmoid_kernel.h
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "paddle/phi/core/dense_tensor.h"
-
-namespace phi {
-
-template <typename T, typename Context>
-void HierarchicalSigmoidKernel(const Context& ctx,
-                               const DenseTensor& x,
-                               const DenseTensor& w,
-                               const DenseTensor& label,
-                               const paddle::optional<DenseTensor>& path,
-                               const paddle::optional<DenseTensor>& code,
-                               const paddle::optional<DenseTensor>& bias,
-                               int num_classes,
-                               bool remote_prefetch,
-                               int trainer_id,
-                               const std::vector<int64_t>& height_sections,
-                               const std::vector<std::string>& epmap,
-                               const std::vector<std::string>& table_names,
-                               bool is_sparse,
-                               DenseTensor* out,
-                               DenseTensor* pre_out,
-                               DenseTensor* w_out);
-
-}  // namespace phi
--- a/paddle/phi/kernels/yolov3_loss_grad_kernel.h
+++ b/paddle/phi/kernels/yolov3_loss_grad_kernel.h
@@ -19,24 +19,24 @@
 namespace phi {

 template <typename T, typename Context>
-void Yolov3LossGradKernel(const Context& dev_ctx,
-                          const DenseTensor& x,
-                          const DenseTensor& gt_box,
-                          const DenseTensor& gt_label,
-                          const paddle::optional<DenseTensor>& gt_score,
-                          const DenseTensor& objectness_mask,
-                          const DenseTensor& gt_match_mask,
-                          const DenseTensor& loss_grad,
-                          const std::vector<int>& anchors,
-                          const std::vector<int>& anchor_mask,
-                          int class_num,
-                          float ignore_thresh,
-                          int downsample_ratio,
-                          bool use_label_smooth,
-                          float scale_x_Y,
-                          DenseTensor* x_grad,
-                          DenseTensor* gt_box_grad,
-                          DenseTensor* gt_label_grad,
-                          DenseTensor* gt_score_grad);
+void HSigmoidLossGradKernel(const Context& ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& w,
+                            const DenseTensor& label,
+                            const paddle::optional<DenseTensor>& path,
+                            const paddle::optional<DenseTensor>& code,
+                            const paddle::optional<DenseTensor>& bias,
+                            const DenseTensor& pre_out,
+                            const DenseTensor& out_grad,
+                            int num_classes,
+                            bool remote_prefetch,
+                            int trainer_id,
+                            const std::vector<int64_t>& height_sections,
+                            const std::vector<std::string>& epmap,
+                            const std::vector<std::string>& table_names,
+                            bool is_sparse,
+                            DenseTensor* x_grad,
+                            DenseTensor* w_grad,
+                            DenseTensor* bias_grad);

 }  // namespace phi
--- a/paddle/phi/kernels/hsigmoid_loss_kernel.h
+++ b/paddle/phi/kernels/hsigmoid_loss_kernel.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void HSigmoidLossKernel(const Context& ctx,
+                        const DenseTensor& x,
+                        const DenseTensor& w,
+                        const DenseTensor& label,
+                        const paddle::optional<DenseTensor>& path,
+                        const paddle::optional<DenseTensor>& code,
+                        const paddle::optional<DenseTensor>& bias,
+                        int num_classes,
+                        bool remote_prefetch,
+                        int trainer_id,
+                        const std::vector<int64_t>& height_sections,
+                        const std::vector<std::string>& epmap,
+                        const std::vector<std::string>& table_names,
+                        bool is_sparse,
+                        DenseTensor* out,
+                        DenseTensor* pre_out,
+                        DenseTensor* w_out);
+
+}  // namespace phi
--- a/paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h
@@ -17,19 +17,19 @@
 #include "paddle/phi/common/int_array.h"
 #include "paddle/phi/kernels/funcs/reduce_functor.h"
 #include "paddle/phi/kernels/impl/reduce_grad.h"
-#include "paddle/phi/kernels/reduce_prod_grad_kernel.h"
+#include "paddle/phi/kernels/prod_grad_kernel.h"

 namespace phi {

 template <typename T, typename Context>
-void ReduceProdGradKernel(const Context& dev_ctx,
-                          const DenseTensor& x,
-                          const DenseTensor& out,
-                          const DenseTensor& out_grad,
-                          const IntArray& dims,
-                          bool keep_dim,
-                          bool reduce_all,
-                          DenseTensor* x_grad) {
+void ProdGradKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& out,
+                    const DenseTensor& out_grad,
+                    const IntArray& dims,
+                    bool keep_dim,
+                    bool reduce_all,
+                    DenseTensor* x_grad) {
  ReduceGradKernel<Context, T, funcs::ProdGradFunctor>(
      dev_ctx, x, out, out_grad, dims.GetData(), keep_dim, reduce_all, x_grad);
 }

--- a/paddle/phi/kernels/impl/qr_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/qr_grad_kernel_impl.h
@@ -29,7 +29,7 @@
 #include "paddle/phi/kernels/slice_kernel.h"
 #include "paddle/phi/kernels/transpose_kernel.h"
 #include "paddle/phi/kernels/triangular_solve_kernel.h"
-#include "paddle/phi/kernels/tril_triu_kernel.h"
+#include "paddle/phi/kernels/tril_kernel.h"

 namespace phi {

@@ -116,8 +116,8 @@ void QrGradKernel(const Context& ctx,
    DenseTensor M_tmp1 = Subtract<T, Context>(ctx, R_term, Q_term);

    // Compute M = (tril(M) + tril(M).mH()) * 0.5 Identity
-    DenseTensor M_tril_0 = TrilTriu<T, Context>(ctx, M_tmp1, 0, true);
-    DenseTensor M_tril_1 = TrilTriu<T, Context>(ctx, M_tmp1, -1, true);
+    DenseTensor M_tril_0 = Tril<T, Context>(ctx, M_tmp1, 0, true);
+    DenseTensor M_tril_1 = Tril<T, Context>(ctx, M_tmp1, -1, true);
    DenseTensor M = Add<T, Context>(
        ctx, M_tril_0, TransposeLast2Dim<T, Context>(ctx, M_tril_1));


--- a/paddle/phi/kernels/impl/tril_triu_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/tril_triu_grad_kernel_impl.h
@@ -16,16 +16,16 @@

 #include "paddle/phi/kernels/funcs/for_range.h"
 #include "paddle/phi/kernels/funcs/tril_triu_compute.h"
-#include "paddle/phi/kernels/tril_triu_grad_kernel.h"
+#include "paddle/phi/kernels/tril_grad_kernel.h"

 namespace phi {

 template <typename T, typename Context>
-void TrilTriuGradKernel(const Context& ctx,
-                        const DenseTensor& out_grad,
-                        int diagonal,
-                        bool lower,
-                        DenseTensor* x_grad) {
+void TrilGradKernel(const Context& ctx,
+                    const DenseTensor& out_grad,
+                    int diagonal,
+                    bool lower,
+                    DenseTensor* x_grad) {
  const auto* dout_data = out_grad.data<T>();
  auto* dx_data = ctx.template Alloc<T>(x_grad);


--- a/paddle/phi/kernels/impl/tril_triu_kernel_impl.h
+++ b/paddle/phi/kernels/impl/tril_triu_kernel_impl.h
@@ -16,16 +16,16 @@

 #include "paddle/phi/kernels/funcs/for_range.h"
 #include "paddle/phi/kernels/funcs/tril_triu_compute.h"
-#include "paddle/phi/kernels/tril_triu_kernel.h"
+#include "paddle/phi/kernels/tril_kernel.h"

 namespace phi {

 template <typename T, typename Context>
-void TrilTriuKernel(const Context& ctx,
-                    const DenseTensor& x,
-                    int diagonal,
-                    bool lower,
-                    DenseTensor* out) {
+void TrilKernel(const Context& ctx,
+                const DenseTensor& x,
+                int diagonal,
+                bool lower,
+                DenseTensor* out) {
  const auto* x_data = x.data<T>();
  auto* out_data = ctx.template Alloc<T>(out);


--- a/paddle/phi/kernels/kps/reduce_prod_kernel.cu
+++ b/paddle/phi/kernels/kps/reduce_prod_kernel.cu
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/reduce_prod_kernel.h"
+#include "paddle/phi/kernels/prod_kernel.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/gpu/reduce.h"


--- a/paddle/phi/kernels/where_index_kernel.h
+++ b/paddle/phi/kernels/where_index_kernel.h
@@ -19,8 +19,8 @@
 namespace phi {

 template <typename T, typename Context>
-void WhereIndexKernel(const Context& dev_ctx,
-                      const DenseTensor& condition,
-                      DenseTensor* out);
+void NonZeroKernel(const Context& dev_ctx,
+                   const DenseTensor& condition,
+                   DenseTensor* out);

 }  // namespace phi
--- a/paddle/phi/kernels/reduce_prod_grad_kernel.h
+++ b/paddle/phi/kernels/reduce_prod_grad_kernel.h
@@ -20,12 +20,12 @@
 namespace phi {

 template <typename T, typename Context>
-void ReduceProdGradKernel(const Context& dev_ctx,
-                          const DenseTensor& x,
-                          const DenseTensor& out,
-                          const DenseTensor& out_grad,
-                          const IntArray& dims,
-                          bool keep_dim,
-                          bool reduce_all,
-                          DenseTensor* x_grad);
+void ProdGradKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& out,
+                    const DenseTensor& out_grad,
+                    const IntArray& dims,
+                    bool keep_dim,
+                    bool reduce_all,
+                    DenseTensor* x_grad);
 }  // namespace phi
--- a/paddle/phi/kernels/reduce_prod_kernel.cc
+++ b/paddle/phi/kernels/reduce_prod_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/reduce_prod_kernel.h"
+#include "paddle/phi/kernels/prod_kernel.h"

 #include "paddle/phi/backends/all_context.h"
 #include "paddle/phi/core/kernel_registry.h"

--- a/paddle/phi/kernels/reduce_prod_kernel.h
+++ b/paddle/phi/kernels/reduce_prod_kernel.h
--- a/paddle/phi/kernels/selected_rows/hierarchical_sigmoid_grad_kernel.cc
+++ b/paddle/phi/kernels/selected_rows/hierarchical_sigmoid_grad_kernel.cc
@@ -12,12 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/selected_rows/hierarchical_sigmoid_grad_kernel.h"
+#include "paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h"

 #include "paddle/fluid/framework/mixed_vector.h"
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/cpu/hierarchical_sigmoid_grad.h"
+#include "paddle/phi/kernels/cpu/hsigmoid_loss_grad.h"

 namespace phi {
 namespace sr {
@@ -36,25 +36,25 @@ static std::vector<int64_t> PathToRows(const DenseTensor& path) {
 }

 template <typename T, typename Context>
-void HierarchicalSigmoidGradKernel(const Context& ctx,
-                                   const DenseTensor& x,
-                                   const DenseTensor& w,
-                                   const DenseTensor& label,
-                                   const paddle::optional<DenseTensor>& path,
-                                   const paddle::optional<DenseTensor>& code,
-                                   const paddle::optional<DenseTensor>& bias,
-                                   const DenseTensor& pre_out,
-                                   const DenseTensor& out_grad,
-                                   int num_classes,
-                                   bool remote_prefetch,
-                                   int trainer_id,
-                                   const std::vector<int64_t>& height_sections,
-                                   const std::vector<std::string>& epmap,
-                                   const std::vector<std::string>& table_names,
-                                   bool is_sparse,
-                                   DenseTensor* x_grad,
-                                   SelectedRows* w_grad,
-                                   DenseTensor* bias_grad) {
+void HSigmoidLossGradKernel(const Context& ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& w,
+                            const DenseTensor& label,
+                            const paddle::optional<DenseTensor>& path,
+                            const paddle::optional<DenseTensor>& code,
+                            const paddle::optional<DenseTensor>& bias,
+                            const DenseTensor& pre_out,
+                            const DenseTensor& out_grad,
+                            int num_classes,
+                            bool remote_prefetch,
+                            int trainer_id,
+                            const std::vector<int64_t>& height_sections,
+                            const std::vector<std::string>& epmap,
+                            const std::vector<std::string>& table_names,
+                            bool is_sparse,
+                            DenseTensor* x_grad,
+                            SelectedRows* w_grad,
+                            DenseTensor* bias_grad) {
  PADDLE_ENFORCE_NOT_NULL(
      path.get_ptr(),
      errors::NotFound("Custom tree must be set for sparse mode!"));
@@ -66,34 +66,34 @@ void HierarchicalSigmoidGradKernel(const Context& ctx,
  phi::DDim temp_dim(w.dims());
  temp_dim[0] = real_rows.size();
  w_grad_value->Resize(temp_dim);
-  phi::HierarchicalSigmoidGradKernelImpl<T>(ctx,
-                                            x,
-                                            w,
-                                            label,
-                                            path,
-                                            code,
-                                            bias,
-                                            pre_out,
-                                            out_grad,
-                                            num_classes,
-                                            remote_prefetch,
-                                            trainer_id,
-                                            height_sections,
-                                            epmap,
-                                            table_names,
-                                            is_sparse,
-                                            x_grad,
-                                            w_grad_value,
-                                            bias_grad,
-                                            w_grad);
+  phi::HSigmoidLossGradKernelImpl<T>(ctx,
+                                     x,
+                                     w,
+                                     label,
+                                     path,
+                                     code,
+                                     bias,
+                                     pre_out,
+                                     out_grad,
+                                     num_classes,
+                                     remote_prefetch,
+                                     trainer_id,
+                                     height_sections,
+                                     epmap,
+                                     table_names,
+                                     is_sparse,
+                                     x_grad,
+                                     w_grad_value,
+                                     bias_grad,
+                                     w_grad);
 }

 }  // namespace sr
 }  // namespace phi

-PD_REGISTER_KERNEL(hierarchical_sigmoid_grad_sr,
+PD_REGISTER_KERNEL(hsigmoid_loss_grad_sr,
                   CPU,
                   ALL_LAYOUT,
-                   phi::sr::HierarchicalSigmoidGradKernel,
+                   phi::sr::HSigmoidLossGradKernel,
                   float,
                   double) {}
--- a/paddle/phi/kernels/selected_rows/hierarchical_sigmoid_grad_kernel.h
+++ b/paddle/phi/kernels/selected_rows/hierarchical_sigmoid_grad_kernel.h
@@ -21,25 +21,25 @@ namespace phi {
 namespace sr {

 template <typename T, typename Context>
-void HierarchicalSigmoidGradKernel(const Context& ctx,
-                                   const DenseTensor& x,
-                                   const DenseTensor& w,
-                                   const DenseTensor& label,
-                                   const paddle::optional<DenseTensor>& path,
-                                   const paddle::optional<DenseTensor>& code,
-                                   const paddle::optional<DenseTensor>& bias,
-                                   const DenseTensor& pre_out,
-                                   const DenseTensor& out_grad,
-                                   int num_classes,
-                                   bool remote_prefetch,
-                                   int trainer_id,
-                                   const std::vector<int64_t>& height_sections,
-                                   const std::vector<std::string>& epmap,
-                                   const std::vector<std::string>& table_names,
-                                   bool is_sparse,
-                                   DenseTensor* x_grad,
-                                   SelectedRows* w_grad,
-                                   DenseTensor* bias_grad);
+void HSigmoidLossGradKernel(const Context& ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& w,
+                            const DenseTensor& label,
+                            const paddle::optional<DenseTensor>& path,
+                            const paddle::optional<DenseTensor>& code,
+                            const paddle::optional<DenseTensor>& bias,
+                            const DenseTensor& pre_out,
+                            const DenseTensor& out_grad,
+                            int num_classes,
+                            bool remote_prefetch,
+                            int trainer_id,
+                            const std::vector<int64_t>& height_sections,
+                            const std::vector<std::string>& epmap,
+                            const std::vector<std::string>& table_names,
+                            bool is_sparse,
+                            DenseTensor* x_grad,
+                            SelectedRows* w_grad,
+                            DenseTensor* bias_grad);

 }  // namespace sr
 }  // namespace phi
--- a/paddle/phi/kernels/selected_rows/uniform_random_kernel.cc
+++ b/paddle/phi/kernels/selected_rows/uniform_random_kernel.cc
@@ -12,95 +12,85 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "paddle/phi/kernels/selected_rows/uniform_random_kernel.h"
+#include "paddle/phi/kernels/selected_rows/uniform_kernel.h"

 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/uniform_random_kernel.h"
+#include "paddle/phi/kernels/uniform_kernel.h"

 namespace phi {
 namespace sr {

 template <typename T, typename Context>
-void UniformRandomRawKernel(const Context& dev_ctx,
-                            const IntArray& shape,
-                            DataType dtype,
-                            const Scalar& min,
-                            const Scalar& max,
-                            int seed,
-                            int diag_num,
-                            int diag_step,
-                            float diag_val,
-                            SelectedRows* out) {
-  phi::UniformRandomRawKernel<T>(dev_ctx,
-                                 shape,
-                                 dtype,
-                                 min,
-                                 max,
-                                 seed,
-                                 diag_num,
-                                 diag_step,
-                                 diag_val,
-                                 out->mutable_value());
+void UniformRawKernel(const Context& dev_ctx,
+                      const IntArray& shape,
+                      DataType dtype,
+                      const Scalar& min,
+                      const Scalar& max,
+                      int seed,
+                      int diag_num,
+                      int diag_step,
+                      float diag_val,
+                      SelectedRows* out) {
+  phi::UniformRawKernel<T>(dev_ctx,
+                           shape,
+                           dtype,
+                           min,
+                           max,
+                           seed,
+                           diag_num,
+                           diag_step,
+                           diag_val,
+                           out->mutable_value());
 }

 template <typename T, typename Context>
-void UniformRandomKernel(const Context& dev_ctx,
-                         const IntArray& shape,
-                         DataType dtype,
-                         const Scalar& min,
-                         const Scalar& max,
-                         int seed,
-                         SelectedRows* out) {
-  phi::UniformRandomKernel<T>(
+void UniformKernel(const Context& dev_ctx,
+                   const IntArray& shape,
+                   DataType dtype,
+                   const Scalar& min,
+                   const Scalar& max,
+                   int seed,
+                   SelectedRows* out) {
+  phi::UniformKernel<T>(
      dev_ctx, shape, dtype, min, max, seed, out->mutable_value());
 }

 }  // namespace sr
 }  // namespace phi

-PD_REGISTER_KERNEL(uniform_random_raw_sr,
+PD_REGISTER_KERNEL(uniform_raw_sr,
                   CPU,
                   ALL_LAYOUT,
-                   phi::sr::UniformRandomRawKernel,
+                   phi::sr::UniformRawKernel,
                   float,
                   double,
                   phi::dtype::bfloat16) {}

-PD_REGISTER_KERNEL(uniform_random_sr,
+PD_REGISTER_KERNEL(uniform_sr,
                   CPU,
                   ALL_LAYOUT,
-                   phi::sr::UniformRandomKernel,
+                   phi::sr::UniformKernel,
                   float,
                   double,
                   phi::dtype::bfloat16) {}

 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)

-PD_REGISTER_KERNEL(uniform_random_raw_sr,
-                   GPU,
-                   ALL_LAYOUT,
-                   phi::sr::UniformRandomRawKernel,
-                   float,
-                   double) {}
+PD_REGISTER_KERNEL(
+    uniform_raw_sr, GPU, ALL_LAYOUT, phi::sr::UniformRawKernel, float, double) {
+}

-PD_REGISTER_KERNEL(uniform_random_sr,
-                   GPU,
-                   ALL_LAYOUT,
-                   phi::sr::UniformRandomKernel,
-                   float,
-                   double) {}
+PD_REGISTER_KERNEL(
+    uniform_sr, GPU, ALL_LAYOUT, phi::sr::UniformKernel, float, double) {}
 #endif

 #if defined(PADDLE_WITH_XPU)

-PD_REGISTER_KERNEL(uniform_random_raw_sr,
-                   XPU,
-                   ALL_LAYOUT,
-                   phi::sr::UniformRandomRawKernel,
-                   float) {}
-
 PD_REGISTER_KERNEL(
-    uniform_random_sr, XPU, ALL_LAYOUT, phi::sr::UniformRandomKernel, float) {}
+    uniform_raw_sr, XPU, ALL_LAYOUT, phi::sr::UniformRawKernel, float) {}
+
+PD_REGISTER_KERNEL(uniform_sr, XPU, ALL_LAYOUT, phi::sr::UniformKernel, float) {
+}
 #endif
--- a/paddle/phi/kernels/selected_rows/uniform_random_kernel.h
+++ b/paddle/phi/kernels/selected_rows/uniform_random_kernel.h
@@ -22,25 +22,25 @@ namespace phi {
 namespace sr {

 template <typename T, typename Context>
-void UniformRandomRawKernel(const Context& dev_ctx,
-                            const IntArray& shape,
-                            DataType dtype,
-                            const Scalar& min,
-                            const Scalar& max,
-                            int seed,
-                            int diag_num,
-                            int diag_step,
-                            float diag_val,
-                            SelectedRows* out);
+void UniformRawKernel(const Context& dev_ctx,
+                      const IntArray& shape,
+                      DataType dtype,
+                      const Scalar& min,
+                      const Scalar& max,
+                      int seed,
+                      int diag_num,
+                      int diag_step,
+                      float diag_val,
+                      SelectedRows* out);

 template <typename T, typename Context>
-void UniformRandomKernel(const Context& dev_ctx,
-                         const IntArray& shape,
-                         DataType dtype,
-                         const Scalar& min,
-                         const Scalar& max,
-                         int seed,
-                         SelectedRows* out);
+void UniformKernel(const Context& dev_ctx,
+                   const IntArray& shape,
+                   DataType dtype,
+                   const Scalar& min,
+                   const Scalar& max,
+                   int seed,
+                   SelectedRows* out);

 }  // namespace sr
 }  // namespace phi
--- a/paddle/phi/kernels/tril_triu_grad_kernel.h
+++ b/paddle/phi/kernels/tril_triu_grad_kernel.h
@@ -19,10 +19,10 @@
 namespace phi {

 template <typename T, typename Context>
-void TrilTriuGradKernel(const Context& ctx,
-                        const DenseTensor& out_grad,
-                        int diagonal,
-                        bool lower,
-                        DenseTensor* x_grad);
+void TrilGradKernel(const Context& ctx,
+                    const DenseTensor& out_grad,
+                    int diagonal,
+                    bool lower,
+                    DenseTensor* x_grad);

 }  // namespace phi
--- a/paddle/phi/kernels/tril_triu_kernel.h
+++ b/paddle/phi/kernels/tril_triu_kernel.h
@@ -20,21 +20,21 @@
 namespace phi {

 template <typename T, typename Context>
-void TrilTriuKernel(const Context& ctx,
-                    const DenseTensor& x,
-                    int diagonal,
-                    bool lower,
-                    DenseTensor* out);
+void TrilKernel(const Context& ctx,
+                const DenseTensor& x,
+                int diagonal,
+                bool lower,
+                DenseTensor* out);

 template <typename T, typename Context>
-DenseTensor TrilTriu(const Context& ctx,
-                     const DenseTensor& x,
-                     int diagonal,
-                     bool lower) {
+DenseTensor Tril(const Context& ctx,
+                 const DenseTensor& x,
+                 int diagonal,
+                 bool lower) {
  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
-  TrilTriuInferMeta(x, diagonal, lower, &meta_out);
-  TrilTriuKernel<T, Context>(ctx, x, diagonal, lower, &dense_out);
+  TrilInferMeta(x, diagonal, lower, &meta_out);
+  TrilKernel<T, Context>(ctx, x, diagonal, lower, &dense_out);
  return dense_out;
 }


--- a/paddle/phi/kernels/uniform_random_inplace_kernel.h
+++ b/paddle/phi/kernels/uniform_random_inplace_kernel.h
@@ -19,14 +19,14 @@ limitations under the License. */
 namespace phi {

 template <typename T, typename Context>
-void UniformRandomInplaceKernel(const Context& ctx,
-                                const DenseTensor& x,
-                                float min,
-                                float max,
-                                int seed,
-                                int diag_num,
-                                int diag_step,
-                                float diag_val,
-                                DenseTensor* out);
+void UniformInplaceGradKernel(const Context& ctx,
+                              const DenseTensor& out_grad,
+                              float min,
+                              float max,
+                              int seed,
+                              int diag_num,
+                              int diag_step,
+                              float diag_val,
+                              DenseTensor* x_grad);

 }  // namespace phi
--- a/paddle/phi/kernels/uniform_random_inplace_grad_kernel.h
+++ b/paddle/phi/kernels/uniform_random_inplace_grad_kernel.h
@@ -19,14 +19,14 @@ limitations under the License. */
 namespace phi {

 template <typename T, typename Context>
-void UniformRandomInplaceGradKernel(const Context& ctx,
-                                    const DenseTensor& out_grad,
-                                    float min,
-                                    float max,
-                                    int seed,
-                                    int diag_num,
-                                    int diag_step,
-                                    float diag_val,
-                                    DenseTensor* x_grad);
+void UniformInplaceKernel(const Context& ctx,
+                          const DenseTensor& x,
+                          float min,
+                          float max,
+                          int seed,
+                          int diag_num,
+                          int diag_step,
+                          float diag_val,
+                          DenseTensor* out);

 }  // namespace phi
--- a/paddle/phi/kernels/uniform_random_kernel.cc
+++ b/paddle/phi/kernels/uniform_random_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/uniform_random_kernel.h"
+#include "paddle/phi/kernels/uniform_kernel.h"

 #include "paddle/phi/common/int_array.h"
 #include "paddle/phi/common/scalar.h"
@@ -29,38 +29,36 @@
 namespace phi {

 template <typename T, typename Context>
-void UniformRandomKernel(const Context& dev_ctx,
-                         const IntArray& shape,
-                         DataType dtype,
-                         const Scalar& min,
-                         const Scalar& max,
-                         int seed,
-                         DenseTensor* out) {
-  UniformRandomRawKernel<T>(
-      dev_ctx, shape, dtype, min, max, seed, 0, 0, 0.0f, out);
+void UniformKernel(const Context& dev_ctx,
+                   const IntArray& shape,
+                   DataType dtype,
+                   const Scalar& min,
+                   const Scalar& max,
+                   int seed,
+                   DenseTensor* out) {
+  UniformRawKernel<T>(dev_ctx, shape, dtype, min, max, seed, 0, 0, 0.0f, out);
 }

 }  // namespace phi

-PD_REGISTER_KERNEL(uniform_random,
+PD_REGISTER_KERNEL(uniform,
                   CPU,
                   ALL_LAYOUT,
-                   phi::UniformRandomKernel,
+                   phi::UniformKernel,
                   float,
                   double,
                   phi::dtype::bfloat16) {}

 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-PD_REGISTER_KERNEL(uniform_random,
+PD_REGISTER_KERNEL(uniform,
                   GPU,
                   ALL_LAYOUT,
-                   phi::UniformRandomKernel,
+                   phi::UniformKernel,
                   float,
                   double,
                   phi::dtype::float16) {}
 #endif

 #ifdef PADDLE_WITH_XPU
-PD_REGISTER_KERNEL(
-    uniform_random, XPU, ALL_LAYOUT, phi::UniformRandomKernel, float) {}
+PD_REGISTER_KERNEL(uniform, XPU, ALL_LAYOUT, phi::UniformKernel, float) {}
 #endif
--- a/paddle/phi/kernels/uniform_random_kernel.h
+++ b/paddle/phi/kernels/uniform_random_kernel.h
@@ -22,24 +22,24 @@
 namespace phi {

 template <typename T, typename Context>
-void UniformRandomRawKernel(const Context& dev_ctx,
-                            const IntArray& shape,
-                            DataType dtype,
-                            const Scalar& min,
-                            const Scalar& max,
-                            int seed,
-                            int diag_num,
-                            int diag_step,
-                            float diag_val,
-                            DenseTensor* out);
+void UniformRawKernel(const Context& dev_ctx,
+                      const IntArray& shape,
+                      DataType dtype,
+                      const Scalar& min,
+                      const Scalar& max,
+                      int seed,
+                      int diag_num,
+                      int diag_step,
+                      float diag_val,
+                      DenseTensor* out);

 template <typename T, typename Context>
-void UniformRandomKernel(const Context& dev_ctx,
-                         const IntArray& shape,
-                         DataType dtype,
-                         const Scalar& min,
-                         const Scalar& max,
-                         int seed,
-                         DenseTensor* out);
+void UniformKernel(const Context& dev_ctx,
+                   const IntArray& shape,
+                   DataType dtype,
+                   const Scalar& min,
+                   const Scalar& max,
+                   int seed,
+                   DenseTensor* out);

 }  // namespace phi
--- a/paddle/phi/kernels/xpu/where_index_kernel.cc
+++ b/paddle/phi/kernels/xpu/where_index_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/where_index_kernel.h"
+#include "paddle/phi/kernels/nonzero_kernel.h"

 #include "paddle/fluid/memory/memcpy.h"
 #include "paddle/fluid/platform/device/xpu/xpu_header.h"
@@ -22,9 +22,9 @@
 namespace phi {

 template <typename T, typename Context>
-void WhereIndexKernel(const Context& dev_ctx,
-                      const DenseTensor& condition,
-                      DenseTensor* out) {
+void NonZeroKernel(const Context& dev_ctx,
+                   const DenseTensor& condition,
+                   DenseTensor* out) {
  const T* cond_data = condition.data<T>();
  auto numel = condition.numel();
  auto dims = condition.dims();
@@ -69,4 +69,4 @@ void WhereIndexKernel(const Context& dev_ctx,
 }  // namespace phi

 PD_REGISTER_KERNEL(
-    where_index, XPU, ALL_LAYOUT, phi::WhereIndexKernel, int, bool, float) {}
+    nonzero, XPU, ALL_LAYOUT, phi::NonZeroKernel, int, bool, float) {}
--- a/paddle/phi/kernels/xpu/reduce_prod_kernel.cc
+++ b/paddle/phi/kernels/xpu/reduce_prod_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/reduce_prod_kernel.h"
+#include "paddle/phi/kernels/prod_kernel.h"

 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/backends/xpu/xpu_context.h"

--- a/paddle/phi/kernels/xpu/tril_triu_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/tril_triu_grad_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/tril_triu_grad_kernel.h"
+#include "paddle/phi/kernels/tril_grad_kernel.h"

 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/core/kernel_registry.h"
@@ -20,11 +20,11 @@
 namespace phi {

 template <typename T, typename Context>
-void TrilTriuGradKernel(const Context& ctx,
-                        const DenseTensor& out_grad,
-                        int diagonal,
-                        bool lower,
-                        DenseTensor* x_grad) {
+void TrilGradKernel(const Context& ctx,
+                    const DenseTensor& out_grad,
+                    int diagonal,
+                    bool lower,
+                    DenseTensor* x_grad) {
  using XPUType = typename XPUTypeTrait<T>::Type;
  ctx.template Alloc<T>(x_grad);
  auto dy_shape = vectorize<int>(out_grad.dims());
@@ -49,4 +49,4 @@ void TrilTriuGradKernel(const Context& ctx,
 }  // namespace phi

 PD_REGISTER_KERNEL(
-    tril_triu_grad, XPU, ALL_LAYOUT, phi::TrilTriuGradKernel, int, float) {}
+    tril_grad, XPU, ALL_LAYOUT, phi::TrilGradKernel, int, float) {}
--- a/paddle/phi/kernels/xpu/tril_triu_kernel.cc
+++ b/paddle/phi/kernels/xpu/tril_triu_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/tril_triu_kernel.h"
+#include "paddle/phi/kernels/tril_kernel.h"

 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/core/kernel_registry.h"
@@ -20,11 +20,11 @@
 namespace phi {

 template <typename T, typename Context>
-void TrilTriuKernel(const Context& ctx,
-                    const DenseTensor& x,
-                    int diagonal,
-                    bool lower,
-                    DenseTensor* out) {
+void TrilKernel(const Context& ctx,
+                const DenseTensor& x,
+                int diagonal,
+                bool lower,
+                DenseTensor* out) {
  using XPUType = typename XPUTypeTrait<T>::Type;
  ctx.template Alloc<T>(out);
  auto xshape = vectorize<int>(x.dims());
@@ -48,5 +48,4 @@ void TrilTriuKernel(const Context& ctx,

 }  // namespace phi

-PD_REGISTER_KERNEL(
-    tril_triu, XPU, ALL_LAYOUT, phi::TrilTriuKernel, int, float) {}
+PD_REGISTER_KERNEL(tril, XPU, ALL_LAYOUT, phi::TrilKernel, int, float) {}
--- a/paddle/phi/kernels/xpu/uniform_random_kernel.cc
+++ b/paddle/phi/kernels/xpu/uniform_random_kernel.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "paddle/phi/kernels/uniform_random_kernel.h"
+#include "paddle/phi/kernels/uniform_kernel.h"

 #include <string>

@@ -24,16 +24,16 @@ limitations under the License. */
 namespace phi {

 template <typename T, typename Context>
-void UniformRandomRawKernel(const Context &dev_ctx,
-                            const IntArray &shape,
-                            DataType dtype,
-                            const Scalar &min,
-                            const Scalar &max,
-                            int seed,
-                            int diag_num,
-                            int diag_step,
-                            float diag_val,
-                            DenseTensor *out) {
+void UniformRawKernel(const Context &dev_ctx,
+                      const IntArray &shape,
+                      DataType dtype,
+                      const Scalar &min,
+                      const Scalar &max,
+                      int seed,
+                      int diag_num,
+                      int diag_step,
+                      float diag_val,
+                      DenseTensor *out) {
  out->Resize(phi::make_ddim(shape.GetData()));
  T *data = dev_ctx.template Alloc<T>(out);
  int64_t size = out->numel();
@@ -76,5 +76,5 @@ void UniformRandomRawKernel(const Context &dev_ctx,

 }  // namespace phi

-PD_REGISTER_KERNEL(
-    uniform_random_raw, XPU, ALL_LAYOUT, phi::UniformRandomRawKernel, float) {}
+PD_REGISTER_KERNEL(uniform_raw, XPU, ALL_LAYOUT, phi::UniformRawKernel, float) {
+}
--- a/paddle/phi/kernels/yolo_loss_grad_kernel.h
+++ b/paddle/phi/kernels/yolo_loss_grad_kernel.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void YoloLossGradKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        const DenseTensor& gt_box,
+                        const DenseTensor& gt_label,
+                        const paddle::optional<DenseTensor>& gt_score,
+                        const DenseTensor& objectness_mask,
+                        const DenseTensor& gt_match_mask,
+                        const DenseTensor& loss_grad,
+                        const std::vector<int>& anchors,
+                        const std::vector<int>& anchor_mask,
+                        int class_num,
+                        float ignore_thresh,
+                        int downsample_ratio,
+                        bool use_label_smooth,
+                        float scale_x_Y,
+                        DenseTensor* x_grad,
+                        DenseTensor* gt_box_grad,
+                        DenseTensor* gt_label_grad,
+                        DenseTensor* gt_score_grad);
+
+}  // namespace phi
--- a/paddle/phi/kernels/yolo_loss_kernel.h
+++ b/paddle/phi/kernels/yolo_loss_kernel.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void YoloLossKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& gt_box,
+                    const DenseTensor& gt_label,
+                    const paddle::optional<DenseTensor>& gt_score,
+                    const std::vector<int>& anchors,
+                    const std::vector<int>& anchor_mask,
+                    int class_num,
+                    float ignore_thresh,
+                    int downsample_ratio,
+                    bool use_label_smooth,
+                    float scale_x_Y,
+                    DenseTensor* loss,
+                    DenseTensor* objectness_mask,
+                    DenseTensor* gt_match_mask);
+
+}  // namespace phi
--- a/paddle/phi/kernels/yolov3_loss_kernel.h
+++ b/paddle/phi/kernels/yolov3_loss_kernel.h
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "paddle/phi/core/dense_tensor.h"
-
-namespace phi {
-
-template <typename T, typename Context>
-void Yolov3LossKernel(const Context& dev_ctx,
-                      const DenseTensor& x,
-                      const DenseTensor& gt_box,
-                      const DenseTensor& gt_label,
-                      const paddle::optional<DenseTensor>& gt_score,
-                      const std::vector<int>& anchors,
-                      const std::vector<int>& anchor_mask,
-                      int class_num,
-                      float ignore_thresh,
-                      int downsample_ratio,
-                      bool use_label_smooth,
-                      float scale_x_Y,
-                      DenseTensor* loss,
-                      DenseTensor* objectness_mask,
-                      DenseTensor* gt_match_mask);
-
-}  // namespace phi
--- a/paddle/phi/ops/compat/hierarchical_sigmoid_sig.cc
+++ b/paddle/phi/ops/compat/hierarchical_sigmoid_sig.cc
@@ -18,7 +18,7 @@ namespace phi {

 KernelSignature HierarchicalSigmoidOpArgumentMapping(
    const ArgumentMappingContext& ctx) {
-  return KernelSignature("hierarchical_sigmoid",
+  return KernelSignature("hsigmoid_loss",
                         {"X", "W", "Label", "PathTable", "PathCode", "Bias"},
                         {"num_classes",
                          "remote_prefetch",
@@ -33,7 +33,7 @@ KernelSignature HierarchicalSigmoidOpArgumentMapping(
 KernelSignature HierarchicalSigmoidGradOpArgumentMapping(
    const ArgumentMappingContext& ctx) {
  if (ctx.IsDenseTensorOutput("W@GRAD")) {
-    return KernelSignature("hierarchical_sigmoid_grad",
+    return KernelSignature("hsigmoid_loss_grad",
                           {"X",
                            "W",
                            "Label",
@@ -51,7 +51,7 @@ KernelSignature HierarchicalSigmoidGradOpArgumentMapping(
                            "is_sparse"},
                           {"X@GRAD", "W@GRAD", "Bias@GRAD"});
  } else if (ctx.IsSelectedRowsOutput("W@GRAD")) {
-    return KernelSignature("hierarchical_sigmoid_grad_sr",
+    return KernelSignature("hsigmoid_loss_grad_sr",
                           {"X",
                            "W",
                            "Label",
@@ -75,6 +75,9 @@ KernelSignature HierarchicalSigmoidGradOpArgumentMapping(

 }  // namespace phi

+PD_REGISTER_BASE_KERNEL_NAME(hierarchical_sigmoid, hsigmoid_loss);
+PD_REGISTER_BASE_KERNEL_NAME(hierarchical_sigmoid_grad, hsigmoid_loss_grad);
+
 PD_REGISTER_ARG_MAPPING_FN(hierarchical_sigmoid,
                           phi::HierarchicalSigmoidOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(hierarchical_sigmoid_grad,

--- a/paddle/phi/ops/compat/tril_triu_sig.cc
+++ b/paddle/phi/ops/compat/tril_triu_sig.cc
@@ -17,16 +17,19 @@ limitations under the License. */
 namespace phi {

 KernelSignature TrilTriuOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("tril_triu", {"X"}, {"diagonal", "lower"}, {"Out"});
+  return KernelSignature("tril", {"X"}, {"diagonal", "lower"}, {"Out"});
 }

 KernelSignature TrilTriuGradOpArgumentMapping(
    const ArgumentMappingContext& ctx) {
  return KernelSignature(
-      "tril_triu_grad", {"Out@GRAD"}, {"diagonal", "lower"}, {"X@GRAD"});
+      "tril_grad", {"Out@GRAD"}, {"diagonal", "lower"}, {"X@GRAD"});
 }

 }  // namespace phi

+PD_REGISTER_BASE_KERNEL_NAME(tril_triu, tril);
+PD_REGISTER_BASE_KERNEL_NAME(tril_triu_grad, tril_grad);
+
 PD_REGISTER_ARG_MAPPING_FN(tril_triu, phi::TrilTriuOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(tril_triu_grad, phi::TrilTriuGradOpArgumentMapping);
--- a/paddle/phi/ops/compat/uniform_random_inplace_sig.cc
+++ b/paddle/phi/ops/compat/uniform_random_inplace_sig.cc
@@ -18,7 +18,7 @@ namespace phi {
 KernelSignature UniformRandomInplaceOpArgumentMapping(
    const ArgumentMappingContext& ctx) {
  return KernelSignature(
-      "uniform_random_inplace",
+      "uniform_inplace",
      {"X"},
      {"min", "max", "seed", "diag_num", "diag_step", "diag_val"},
      {"Out"});
@@ -27,7 +27,7 @@ KernelSignature UniformRandomInplaceOpArgumentMapping(
 KernelSignature UniformRandomInplaceGradOpArgumentMapping(
    const ArgumentMappingContext& ctx) {
  return KernelSignature(
-      "uniform_random_inplace_grad",
+      "uniform_inplace_grad",
      {"Out@GRAD"},
      {"min", "max", "seed", "diag_num", "diag_step", "diag_val"},
      {"X@GRAD"});
@@ -35,6 +35,8 @@ KernelSignature UniformRandomInplaceGradOpArgumentMapping(

 }  // namespace phi

+PD_REGISTER_BASE_KERNEL_NAME(uniform_random_inplace, uniform_inplace);
+
 PD_REGISTER_ARG_MAPPING_FN(uniform_random_inplace,
                           phi::UniformRandomInplaceOpArgumentMapping);


--- a/paddle/phi/ops/compat/uniform_random_sig.cc
+++ b/paddle/phi/ops/compat/uniform_random_sig.cc
@@ -22,7 +22,7 @@ KernelSignature UniformRandomOpArgumentMapping(
  if (ctx.IsDenseTensorOutput("Out")) {
    if (diag_num) {
      if (ctx.InputSize("ShapeTensorList") > 0) {
-        return KernelSignature("uniform_random_raw",
+        return KernelSignature("uniform_raw",
                               {},
                               {"ShapeTensorList",
                                "dtype",
@@ -37,7 +37,7 @@ KernelSignature UniformRandomOpArgumentMapping(
        const auto& shape =
            paddle::any_cast<std::vector<int64_t>>(ctx.Attr("shape"));
        if (ctx.HasInput("ShapeTensor") && shape.empty()) {
-          return KernelSignature("uniform_random_raw",
+          return KernelSignature("uniform_raw",
                                 {},
                                 {"ShapeTensor",
                                  "dtype",
@@ -49,7 +49,7 @@ KernelSignature UniformRandomOpArgumentMapping(
                                  "diag_val"},
                                 {"Out"});
        } else {
-          return KernelSignature("uniform_random_raw",
+          return KernelSignature("uniform_raw",
                                 {},
                                 {"shape",
                                  "dtype",
@@ -65,7 +65,7 @@ KernelSignature UniformRandomOpArgumentMapping(
    } else {
      if (ctx.InputSize("ShapeTensorList") > 0) {
        return KernelSignature(
-            "uniform_random",
+            "uniform",
            {},
            {"ShapeTensorList", "dtype", "min", "max", "seed"},
            {"Out"});
@@ -73,22 +73,20 @@ KernelSignature UniformRandomOpArgumentMapping(
        const auto& shape =
            paddle::any_cast<std::vector<int64_t>>(ctx.Attr("shape"));
        if (ctx.HasInput("ShapeTensor") && shape.empty()) {
-          return KernelSignature("uniform_random",
+          return KernelSignature("uniform",
                                 {},
                                 {"ShapeTensor", "dtype", "min", "max", "seed"},
                                 {"Out"});
        } else {
-          return KernelSignature("uniform_random",
-                                 {},
-                                 {"shape", "dtype", "min", "max", "seed"},
-                                 {"Out"});
+          return KernelSignature(
+              "uniform", {}, {"shape", "dtype", "min", "max", "seed"}, {"Out"});
        }
      }
    }
  } else if (ctx.IsSelectedRowsOutput("Out")) {
    if (diag_num) {
      if (ctx.InputSize("ShapeTensorList") > 0) {
-        return KernelSignature("uniform_random_raw_sr",
+        return KernelSignature("uniform_raw_sr",
                               {},
                               {"ShapeTensorList",
                                "dtype",
@@ -103,7 +101,7 @@ KernelSignature UniformRandomOpArgumentMapping(
        const auto& shape =
            paddle::any_cast<std::vector<int64_t>>(ctx.Attr("shape"));
        if (ctx.HasInput("ShapeTensor") && shape.empty()) {
-          return KernelSignature("uniform_random_raw_sr",
+          return KernelSignature("uniform_raw_sr",
                                 {},
                                 {"ShapeTensor",
                                  "dtype",
@@ -115,7 +113,7 @@ KernelSignature UniformRandomOpArgumentMapping(
                                  "diag_val"},
                                 {"Out"});
        } else {
-          return KernelSignature("uniform_random_raw_sr",
+          return KernelSignature("uniform_raw_sr",
                                 {},
                                 {"shape",
                                  "dtype",
@@ -131,7 +129,7 @@ KernelSignature UniformRandomOpArgumentMapping(
    } else {
      if (ctx.InputSize("ShapeTensorList") > 0) {
        return KernelSignature(
-            "uniform_random_sr",
+            "uniform_sr",
            {},
            {"ShapeTensorList", "dtype", "min", "max", "seed"},
            {"Out"});
@@ -139,12 +137,12 @@ KernelSignature UniformRandomOpArgumentMapping(
        const auto& shape =
            paddle::any_cast<std::vector<int64_t>>(ctx.Attr("shape"));
        if (ctx.HasInput("ShapeTensor") && shape.empty()) {
-          return KernelSignature("uniform_random_sr",
+          return KernelSignature("uniform_sr",
                                 {},
                                 {"ShapeTensor", "dtype", "min", "max", "seed"},
                                 {"Out"});
        } else {
-          return KernelSignature("uniform_random_sr",
+          return KernelSignature("uniform_sr",
                                 {},
                                 {"shape", "dtype", "min", "max", "seed"},
                                 {"Out"});
@@ -156,4 +154,6 @@ KernelSignature UniformRandomOpArgumentMapping(
 }
 }  // namespace phi

+PD_REGISTER_BASE_KERNEL_NAME(uniform_random, uniform);
+
 PD_REGISTER_ARG_MAPPING_FN(uniform_random, phi::UniformRandomOpArgumentMapping);
--- a/paddle/phi/ops/compat/where_index_sig.cc
+++ b/paddle/phi/ops/compat/where_index_sig.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/core/compat/op_utils.h"
+
+namespace phi {
+
+KernelSignature WhereIndexOpArgumentMapping(const ArgumentMappingContext& ctx) {
+  return KernelSignature("nonzero", {"Condition"}, {}, {"Out"});
+}
+
+}  // namespace phi
+
+PD_REGISTER_BASE_KERNEL_NAME(where_index, nonzero);
+
+PD_REGISTER_ARG_MAPPING_FN(where_index, phi::WhereIndexOpArgumentMapping);
--- a/paddle/phi/ops/compat/yolov3_loss_sig.cc
+++ b/paddle/phi/ops/compat/yolov3_loss_sig.cc
@@ -17,7 +17,7 @@
 namespace phi {

 KernelSignature Yolov3LossOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("yolov3_loss",
+  return KernelSignature("yolo_loss",
                         {"X", "GTBox", "GTLabel", "GTScore"},
                         {"anchors",
                          "anchor_mask",
@@ -32,7 +32,7 @@ KernelSignature Yolov3LossOpArgumentMapping(const ArgumentMappingContext& ctx) {
 KernelSignature Yolov3LossGradOpArgumentMapping(
    const ArgumentMappingContext& ctx) {
  return KernelSignature(
-      "yolov3_loss_grad",
+      "yolo_loss_grad",
      {"X",
       "GTBox",
       "GTLabel",
@@ -51,6 +51,9 @@ KernelSignature Yolov3LossGradOpArgumentMapping(
 }
 }  // namespace phi

+PD_REGISTER_BASE_KERNEL_NAME(yolov3_loss, yolo_loss);
+PD_REGISTER_BASE_KERNEL_NAME(yolov3_loss_grad, yolo_loss_grad);
+
 PD_REGISTER_ARG_MAPPING_FN(yolov3_loss, phi::Yolov3LossOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(yolov3_loss_grad,
                           phi::Yolov3LossGradOpArgumentMapping);
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -309,7 +309,7 @@ class UniformInitializer(Initializer):

        if framework._non_static_mode():
            if in_dygraph_mode():
-                out_var = _C_ops.uniform_random(
+                out_var = _C_ops.uniform(
                    var.shape,
                    out_dtype,
                    self._low,
@@ -711,7 +711,7 @@ class XavierInitializer(Initializer):
            if self._uniform:
                limit = math.sqrt(6.0 / float(fan_in + fan_out))
                if in_dygraph_mode():
-                    out_var = _C_ops.uniform_random(
+                    out_var = _C_ops.uniform(
                        out_var.shape,
                        out_dtype,
                        -limit,
@@ -923,7 +923,7 @@ class MSRAInitializer(Initializer):
                gain = calculate_gain(self._nonlinearity, self._negative_slope)
                limit = gain * math.sqrt(3.0 / float(fan_in))
                if in_dygraph_mode():
-                    out_var = _C_ops.uniform_random(
+                    out_var = _C_ops.uniform(
                        var.shape,
                        out_dtype,
                        -limit,

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -5385,7 +5385,7 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None):
                )
            )
    if in_dygraph_mode():
-        return _C_ops.reduce_prod(
+        return _C_ops.prod(
            input,
            dim if dim != None and dim != [] else [0],
            keep_dim,
@@ -15548,7 +15548,7 @@ def where(condition):
    """

    if in_dygraph_mode():
-        return _C_ops.where_index(condition)
+        return _C_ops.nonzero(condition)
    if _in_legacy_dygraph():
        return _legacy_C_ops.where_index(condition)

@@ -16567,7 +16567,7 @@ def uniform_random(

    if in_dygraph_mode():
        shape = utils.convert_shape_to_list(shape)
-        return _C_ops.uniform_random(
+        return _C_ops.uniform(
            shape,
            dtype,
            float(min),

--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -1017,7 +1017,7 @@ def hsigmoid_loss(
            #  [1.92374969]]
    """
    if in_dygraph_mode():
-        out, _, _ = _C_ops.hierarchical_sigmoid(
+        out, _, _ = _C_ops.hsigmoid_loss(
            input,
            weight,
            label,

--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@@ -1097,7 +1097,7 @@ def tril(x, diagonal=0, name=None):
            #         [9 , 10, 0 , 0 ]])
    """
    if in_dygraph_mode():
-        return _C_ops.tril_triu(x, diagonal, True)
+        return _C_ops.tril(x, diagonal, True)

    if _in_legacy_dygraph():
        op = getattr(_legacy_C_ops, 'tril_triu')
@@ -1163,7 +1163,7 @@ def triu(x, diagonal=0, name=None):

    """
    if in_dygraph_mode():
-        return _C_ops.tril_triu(x, diagonal, False)
+        return _C_ops.tril(x, diagonal, False)

    if _in_legacy_dygraph():
        op = getattr(_legacy_C_ops, 'tril_triu')

--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -3706,7 +3706,7 @@ def prod(x, axis=None, keepdim=False, dtype=None, name=None):
            dim = [0]

    if in_dygraph_mode():
-        return _C_ops.reduce_prod(x, dim, keepdim, reduce_all)
+        return _C_ops.prod(x, dim, keepdim, reduce_all)
    if _in_legacy_dygraph():
        return _legacy_C_ops.reduce_prod(
            x, 'dim', dim, 'keep_dim', keepdim, 'reduce_all', reduce_all

--- a/python/paddle/tensor/random.py
+++ b/python/paddle/tensor/random.py
@@ -584,7 +584,7 @@ def uniform(shape, dtype=None, min=-1.0, max=1.0, seed=0, name=None):

    if in_dygraph_mode():
        shape = utils.convert_shape_to_list(shape)
-        return _C_ops.uniform_random(
+        return _C_ops.uniform(
            shape,
            dtype,
            float(min),
@@ -664,7 +664,7 @@ def uniform_(x, min=-1.0, max=1.0, seed=0, name=None):
            #  [ 0.433519,    0.39483607, -0.8660099,   0.83664286]] # random
    """
    if in_dygraph_mode():
-        return _C_ops.uniform_random_inplace_(x, min, max, seed, 0, 0, 1.0)
+        return _C_ops.uniform_inplace_(x, min, max, seed, 0, 0, 1.0)
    else:
        return _legacy_C_ops.uniform_random_inplace_(
            x, 'min', min, 'max', max, 'seed', seed

--- a/python/paddle/tensor/search.py
+++ b/python/paddle/tensor/search.py
@@ -429,7 +429,7 @@ def nonzero(x, as_tuple=False):
    rank = len(shape)

    if in_dygraph_mode():
-        outs = _C_ops.where_index(x)
+        outs = _C_ops.nonzero(x)
    elif paddle.in_dynamic_mode():
        outs = _legacy_C_ops.where_index(x)
    else:

--- a/python/paddle/vision/ops.py
+++ b/python/paddle/vision/ops.py
@@ -197,7 +197,7 @@ def yolo_loss(
    """

    if in_dygraph_mode():
-        loss, _, _ = _C_ops.yolov3_loss(
+        loss, _, _ = _C_ops.yolo_loss(
            x,
            gt_box,
            gt_label,