From 399047d7f113318d7f2a16dff3b560bdacb60c5f Mon Sep 17 00:00:00 2001
From: YuanRisheng <yuanrisheng@baidu.com>
Date: Tue, 1 Nov 2022 18:41:05 +0800
Subject: [PATCH] [PHI]Standardise some C++ API (Part2) (#47510)

* standard_api

* add hardtanh
---
 .../new_executor/standalone_executor_test.cc  |   4 +-
 .../operators/detection/yolov3_loss_op.cc     |   4 +-
 .../operators/hierarchical_sigmoid_op.cc      |   2 +-
 paddle/fluid/operators/tril_triu_op.cc        |   2 +-
 paddle/fluid/operators/where_index_op.cc      |   2 +-
 paddle/phi/api/yaml/legacy_backward.yaml      |  46 +-
 paddle/phi/api/yaml/legacy_ops.yaml           |  92 +--
 paddle/phi/infermeta/backward.cc              |  36 +-
 paddle/phi/infermeta/backward.h               |  36 +-
 paddle/phi/infermeta/multiary.cc              |  60 +-
 paddle/phi/infermeta/multiary.h               |  60 +-
 paddle/phi/infermeta/ternary.cc               | 116 ++--
 paddle/phi/infermeta/ternary.h                |  16 +-
 paddle/phi/infermeta/unary.cc                 | 554 +++++++++---------
 paddle/phi/infermeta/unary.h                  |  58 +-
 .../cpu/hierarchical_sigmoid_grad_kernel.cc   |  71 ---
 ...al_sigmoid_grad.h => hsigmoid_loss_grad.h} |  41 +-
 .../kernels/cpu/hsigmoid_loss_grad_kernel.cc  |  71 +++
 ...moid_kernel.cc => hsigmoid_loss_kernel.cc} |  44 +-
 ...here_index_kernel.cc => nonzero_kernel.cc} |  12 +-
 ...rod_grad_kernel.cc => prod_grad_kernel.cc} |   6 +-
 .../{reduce_prod_kernel.cc => prod_kernel.cc} |   2 +-
 ...ril_triu_kernel.cc => tril_grad_kernel.cc} |   6 +-
 ...ril_triu_grad_kernel.cc => tril_kernel.cc} |   6 +-
 ...rnel.cc => uniform_inplace_grad_kernel.cc} |  24 +-
 ...ce_kernel.cc => uniform_inplace_kernel.cc} |  24 +-
 ...orm_random_kernel.cc => uniform_kernel.cc} |  26 +-
 ...ov3_loss_functor.h => yolo_loss_functor.h} |   0
 ...rad_kernel.cc => yolo_loss_grad_kernel.cc} |  50 +-
 ...ov3_loss_kernel.cc => yolo_loss_kernel.cc} |  36 +-
 paddle/phi/kernels/gpu/lstsq_kernel.cu        |   6 +-
 ...here_index_kernel.cu => nonzero_kernel.cu} |  12 +-
 ...rod_grad_kernel.cu => prod_grad_kernel.cu} |   6 +-
 paddle/phi/kernels/gpu/qr_kernel.cu           |   6 +-
 ...ril_triu_kernel.cu => tril_grad_kernel.cu} |   6 +-
 ...ril_triu_grad_kernel.cu => tril_kernel.cu} |   6 +-
 ...rnel.cu => uniform_inplace_grad_kernel.cu} |  24 +-
 ...ce_kernel.cu => uniform_inplace_kernel.cu} |  24 +-
 ...orm_random_kernel.cu => uniform_kernel.cu} |  26 +-
 .../hierarchical_sigmoid_grad_kernel.h        |  42 --
 .../phi/kernels/hierarchical_sigmoid_kernel.h |  40 --
 .../phi/kernels/hsigmoid_loss_grad_kernel.h   |  42 ++
 paddle/phi/kernels/hsigmoid_loss_kernel.h     |  40 ++
 ..._kernel_impl.h => prod_grad_kernel_impl.h} |  18 +-
 paddle/phi/kernels/impl/qr_grad_kernel_impl.h |   6 +-
 ..._kernel_impl.h => tril_grad_kernel_impl.h} |  12 +-
 ..._triu_kernel_impl.h => tril_kernel_impl.h} |  12 +-
 .../{reduce_prod_kernel.cu => prod_kernel.cu} |   2 +-
 ...{where_index_kernel.h => nonzero_kernel.h} |   6 +-
 ..._prod_grad_kernel.h => prod_grad_kernel.h} |  16 +-
 .../{reduce_prod_kernel.cc => prod_kernel.cc} |   2 +-
 .../{reduce_prod_kernel.h => prod_kernel.h}   |   0
 .../hierarchical_sigmoid_grad_kernel.cc       |  99 ----
 .../hierarchical_sigmoid_grad_kernel.h        |  45 --
 .../hsigmoid_loss_grad_kernel.cc              |  99 ++++
 .../selected_rows/hsigmoid_loss_grad_kernel.h |  45 ++
 .../kernels/selected_rows/uniform_kernel.cc   |  96 +++
 ...iform_random_kernel.h => uniform_kernel.h} |  34 +-
 .../selected_rows/uniform_random_kernel.cc    | 106 ----
 ..._triu_grad_kernel.h => tril_grad_kernel.h} |  10 +-
 .../{tril_triu_kernel.h => tril_kernel.h}     |  22 +-
 ...kernel.h => uniform_inplace_grad_kernel.h} |  18 +-
 ...grad_kernel.h => uniform_inplace_kernel.h} |  18 +-
 ...orm_random_kernel.cc => uniform_kernel.cc} |  30 +-
 ...iform_random_kernel.h => uniform_kernel.h} |  34 +-
 ...here_index_kernel.cc => nonzero_kernel.cc} |  10 +-
 .../{reduce_prod_kernel.cc => prod_kernel.cc} |   2 +-
 ...riu_grad_kernel.cc => tril_grad_kernel.cc} |  14 +-
 .../{tril_triu_kernel.cc => tril_kernel.cc}   |  15 +-
 ...orm_random_kernel.cc => uniform_kernel.cc} |  26 +-
 paddle/phi/kernels/yolo_loss_grad_kernel.h    |  42 ++
 paddle/phi/kernels/yolo_loss_kernel.h         |  38 ++
 paddle/phi/kernels/yolov3_loss_grad_kernel.h  |  42 --
 paddle/phi/kernels/yolov3_loss_kernel.h       |  38 --
 .../ops/compat/hierarchical_sigmoid_sig.cc    |   9 +-
 paddle/phi/ops/compat/tril_triu_sig.cc        |   7 +-
 .../ops/compat/uniform_random_inplace_sig.cc  |   6 +-
 paddle/phi/ops/compat/uniform_random_sig.cc   |  30 +-
 paddle/phi/ops/compat/where_index_sig.cc      |  27 +
 paddle/phi/ops/compat/yolov3_loss_sig.cc      |   7 +-
 python/paddle/fluid/initializer.py            |   6 +-
 python/paddle/fluid/layers/nn.py              |   6 +-
 python/paddle/nn/functional/loss.py           |   2 +-
 python/paddle/tensor/creation.py              |   4 +-
 python/paddle/tensor/math.py                  |   2 +-
 python/paddle/tensor/random.py                |   4 +-
 python/paddle/tensor/search.py                |   2 +-
 python/paddle/vision/ops.py                   |   2 +-
 88 files changed, 1437 insertions(+), 1421 deletions(-)
 delete mode 100644 paddle/phi/kernels/cpu/hierarchical_sigmoid_grad_kernel.cc
 rename paddle/phi/kernels/cpu/{hierarchical_sigmoid_grad.h => hsigmoid_loss_grad.h} (71%)
 create mode 100644 paddle/phi/kernels/cpu/hsigmoid_loss_grad_kernel.cc
 rename paddle/phi/kernels/cpu/{hierarchical_sigmoid_kernel.cc => hsigmoid_loss_kernel.cc} (72%)
 rename paddle/phi/kernels/cpu/{where_index_kernel.cc => nonzero_kernel.cc} (90%)
 rename paddle/phi/kernels/cpu/{reduce_prod_grad_kernel.cc => prod_grad_kernel.cc} (84%)
 rename paddle/phi/kernels/cpu/{reduce_prod_kernel.cc => prod_kernel.cc} (96%)
 rename paddle/phi/kernels/cpu/{tril_triu_kernel.cc => tril_grad_kernel.cc} (88%)
 rename paddle/phi/kernels/cpu/{tril_triu_grad_kernel.cc => tril_kernel.cc} (86%)
 rename paddle/phi/kernels/cpu/{uniform_random_inplace_grad_kernel.cc => uniform_inplace_grad_kernel.cc} (59%)
 rename paddle/phi/kernels/cpu/{uniform_random_inplace_kernel.cc => uniform_inplace_kernel.cc} (68%)
 rename paddle/phi/kernels/cpu/{uniform_random_kernel.cc => uniform_kernel.cc} (76%)
 rename paddle/phi/kernels/cpu/{yolov3_loss_functor.h => yolo_loss_functor.h} (100%)
 rename paddle/phi/kernels/cpu/{yolov3_loss_grad_kernel.cc => yolo_loss_grad_kernel.cc} (85%)
 rename paddle/phi/kernels/cpu/{yolov3_loss_kernel.cc => yolo_loss_kernel.cc} (93%)
 rename paddle/phi/kernels/gpu/{where_index_kernel.cu => nonzero_kernel.cu} (90%)
 rename paddle/phi/kernels/gpu/{reduce_prod_grad_kernel.cu => prod_grad_kernel.cu} (84%)
 rename paddle/phi/kernels/gpu/{tril_triu_kernel.cu => tril_grad_kernel.cu} (88%)
 rename paddle/phi/kernels/gpu/{tril_triu_grad_kernel.cu => tril_kernel.cu} (86%)
 rename paddle/phi/kernels/gpu/{uniform_random_inplace_grad_kernel.cu => uniform_inplace_grad_kernel.cu} (61%)
 rename paddle/phi/kernels/gpu/{uniform_random_inplace_kernel.cu => uniform_inplace_kernel.cu} (79%)
 rename paddle/phi/kernels/gpu/{uniform_random_kernel.cu => uniform_kernel.cu} (81%)
 delete mode 100644 paddle/phi/kernels/hierarchical_sigmoid_grad_kernel.h
 delete mode 100644 paddle/phi/kernels/hierarchical_sigmoid_kernel.h
 create mode 100644 paddle/phi/kernels/hsigmoid_loss_grad_kernel.h
 create mode 100644 paddle/phi/kernels/hsigmoid_loss_kernel.h
 rename paddle/phi/kernels/impl/{reduce_prod_grad_kernel_impl.h => prod_grad_kernel_impl.h} (69%)
 rename paddle/phi/kernels/impl/{tril_triu_grad_kernel_impl.h => tril_grad_kernel_impl.h} (82%)
 rename paddle/phi/kernels/impl/{tril_triu_kernel_impl.h => tril_kernel_impl.h} (83%)
 rename paddle/phi/kernels/kps/{reduce_prod_kernel.cu => prod_kernel.cu} (96%)
 rename paddle/phi/kernels/{where_index_kernel.h => nonzero_kernel.h} (84%)
 rename paddle/phi/kernels/{reduce_prod_grad_kernel.h => prod_grad_kernel.h} (68%)
 rename paddle/phi/kernels/{reduce_prod_kernel.cc => prod_kernel.cc} (96%)
 rename paddle/phi/kernels/{reduce_prod_kernel.h => prod_kernel.h} (100%)
 delete mode 100644 paddle/phi/kernels/selected_rows/hierarchical_sigmoid_grad_kernel.cc
 delete mode 100644 paddle/phi/kernels/selected_rows/hierarchical_sigmoid_grad_kernel.h
 create mode 100644 paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc
 create mode 100644 paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h
 create mode 100644 paddle/phi/kernels/selected_rows/uniform_kernel.cc
 rename paddle/phi/kernels/selected_rows/{uniform_random_kernel.h => uniform_kernel.h} (54%)
 delete mode 100644 paddle/phi/kernels/selected_rows/uniform_random_kernel.cc
 rename paddle/phi/kernels/{tril_triu_grad_kernel.h => tril_grad_kernel.h} (77%)
 rename paddle/phi/kernels/{tril_triu_kernel.h => tril_kernel.h} (66%)
 rename paddle/phi/kernels/{uniform_random_inplace_kernel.h => uniform_inplace_grad_kernel.h} (63%)
 rename paddle/phi/kernels/{uniform_random_inplace_grad_kernel.h => uniform_inplace_kernel.h} (60%)
 rename paddle/phi/kernels/{uniform_random_kernel.cc => uniform_kernel.cc} (68%)
 rename paddle/phi/kernels/{uniform_random_kernel.h => uniform_kernel.h} (54%)
 rename paddle/phi/kernels/xpu/{where_index_kernel.cc => nonzero_kernel.cc} (89%)
 rename paddle/phi/kernels/xpu/{reduce_prod_kernel.cc => prod_kernel.cc} (96%)
 rename paddle/phi/kernels/xpu/{tril_triu_grad_kernel.cc => tril_grad_kernel.cc} (81%)
 rename paddle/phi/kernels/xpu/{tril_triu_kernel.cc => tril_kernel.cc} (82%)
 rename paddle/phi/kernels/xpu/{uniform_random_kernel.cc => uniform_kernel.cc} (77%)
 create mode 100644 paddle/phi/kernels/yolo_loss_grad_kernel.h
 create mode 100644 paddle/phi/kernels/yolo_loss_kernel.h
 delete mode 100644 paddle/phi/kernels/yolov3_loss_grad_kernel.h
 delete mode 100644 paddle/phi/kernels/yolov3_loss_kernel.h
 create mode 100644 paddle/phi/ops/compat/where_index_sig.cc

diff --git a/paddle/fluid/framework/new_executor/standalone_executor_test.cc b/paddle/fluid/framework/new_executor/standalone_executor_test.cc
index f625f133ed..207ca7cf08 100644
--- a/paddle/fluid/framework/new_executor/standalone_executor_test.cc
+++ b/paddle/fluid/framework/new_executor/standalone_executor_test.cc
@@ -63,8 +63,8 @@ USE_OP_ITSELF(memcpy_d2h);
 USE_OP_ITSELF(fetch_v2);
 
 PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(uniform_random_raw, GPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(uniform_random, GPU, ALL_LAYOUT);
+PD_DECLARE_KERNEL(uniform_raw, GPU, ALL_LAYOUT);
+PD_DECLARE_KERNEL(uniform, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(transpose, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(reshape, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(split, GPU, ALL_LAYOUT);
diff --git a/paddle/fluid/operators/detection/yolov3_loss_op.cc b/paddle/fluid/operators/detection/yolov3_loss_op.cc
index 5f6ffece3b..0b8fc79826 100644
--- a/paddle/fluid/operators/detection/yolov3_loss_op.cc
+++ b/paddle/fluid/operators/detection/yolov3_loss_op.cc
@@ -218,10 +218,10 @@ class Yolov3LossGradMaker : public framework::SingleGradOpMaker<T> {
 namespace ops = paddle::operators;
 DECLARE_INFER_SHAPE_FUNCTOR(yolov3_loss,
                             Yolov3LossInferShapeFunctor,
-                            PD_INFER_META(phi::Yolov3LossInferMeta));
+                            PD_INFER_META(phi::YoloLossInferMeta));
 DECLARE_INFER_SHAPE_FUNCTOR(yolov3_loss_grad,
                             Yolov3LossGradInferShapeFunctor,
-                            PD_INFER_META(phi::Yolov3LossGradInferMeta));
+                            PD_INFER_META(phi::YoloLossGradInferMeta));
 REGISTER_OPERATOR(yolov3_loss,
                   ops::Yolov3LossOp,
                   ops::Yolov3LossOpMaker,
diff --git a/paddle/fluid/operators/hierarchical_sigmoid_op.cc b/paddle/fluid/operators/hierarchical_sigmoid_op.cc
index 338b8af503..8193be6b6b 100644
--- a/paddle/fluid/operators/hierarchical_sigmoid_op.cc
+++ b/paddle/fluid/operators/hierarchical_sigmoid_op.cc
@@ -259,7 +259,7 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(
 namespace ops = paddle::operators;
 DECLARE_INFER_SHAPE_FUNCTOR(hierarchical_sigmoid,
                             HierarchicalSigmoidInferShapeFunctor,
-                            PD_INFER_META(phi::HierarchicalSigmoidInferMeta));
+                            PD_INFER_META(phi::HSigmoidLossInferMeta));
 REGISTER_OPERATOR(hierarchical_sigmoid,
                   ops::HierarchicalSigmoidOp,
                   ops::HierarchicalSigmoidOpMaker<int>,
diff --git a/paddle/fluid/operators/tril_triu_op.cc b/paddle/fluid/operators/tril_triu_op.cc
index 5d2c3c0797..97c9289295 100644
--- a/paddle/fluid/operators/tril_triu_op.cc
+++ b/paddle/fluid/operators/tril_triu_op.cc
@@ -93,7 +93,7 @@ namespace ops = paddle::operators;
 namespace plat = paddle::platform;
 DECLARE_INFER_SHAPE_FUNCTOR(tril_triu,
                             TrilTriuInferShapeFunctor,
-                            PD_INFER_META(phi::TrilTriuInferMeta));
+                            PD_INFER_META(phi::TrilInferMeta));
 REGISTER_OPERATOR(tril_triu,
                   ops::TrilTriuOp,
                   ops::TrilTriuOpMaker,
diff --git a/paddle/fluid/operators/where_index_op.cc b/paddle/fluid/operators/where_index_op.cc
index 7e5cc8fa53..52448b08c5 100644
--- a/paddle/fluid/operators/where_index_op.cc
+++ b/paddle/fluid/operators/where_index_op.cc
@@ -48,7 +48,7 @@ class WhereIndexOpMaker : public framework::OpProtoAndCheckerMaker {
 namespace ops = paddle::operators;
 DECLARE_INFER_SHAPE_FUNCTOR(where_index,
                             WhereIndexInferShapeFunctor,
-                            PD_INFER_META(phi::WhereIndexInferMeta));
+                            PD_INFER_META(phi::NonZeroInferMeta));
 REGISTER_OPERATOR(
     where_index,
     ops::WhereIndexOp,
diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
index 922cb70d6e..4e9a4abfcd 100755
--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -791,8 +791,8 @@
     func : hard_tanh_grad
   inplace : (out_grad -> x_grad)
 
-- backward_op : hierarchical_sigmoid_grad
-  forward : hierarchical_sigmoid (Tensor x, Tensor w, Tensor label, Tensor path, Tensor code, Tensor bias, int num_classes, bool remote_prefetch, int trainer_id, int64_t[] height_sections, str[] epmap, str[] table_names, bool is_sparse) -> Tensor(out), Tensor(pre_out), Tensor(w_out)
+- backward_op : hsigmoid_loss_grad
+  forward : hsigmoid_loss (Tensor x, Tensor w, Tensor label, Tensor path, Tensor code, Tensor bias, int num_classes, bool remote_prefetch, int trainer_id, int64_t[] height_sections, str[] epmap, str[] table_names, bool is_sparse) -> Tensor(out), Tensor(pre_out), Tensor(w_out)
   args : (Tensor x, Tensor w, Tensor label, Tensor path, Tensor code, Tensor bias, Tensor pre_out, Tensor out_grad, int num_classes, bool remote_prefetch, int trainer_id, int64_t[] height_sections, str[] epmap, str[] table_names, bool is_sparse)
   output : Tensor(x_grad), Tensor(w_grad), Tensor(bias_grad)
   infer_meta :
@@ -800,7 +800,7 @@
     param : [x ,w, bias]
   optional: path, code, bias
   kernel :
-    func : hierarchical_sigmoid_grad
+    func : hsigmoid_loss_grad
 
 - backward_op : huber_loss_grad
   forward : huber_loss (Tensor input, Tensor label, float delta) -> Tensor(out), Tensor(residual)
@@ -1477,6 +1477,16 @@
   kernel :
     func : prelu_grad
 
+- backward_op : prod_grad
+  forward : prod (Tensor x, IntArray dims, bool keep_dim, bool reduce_all) -> Tensor(out)
+  args : (Tensor x, Tensor out, Tensor out_grad, IntArray dims,  bool keep_dim, bool reduce_all)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : prod_grad
+
 - backward_op : psroi_pool_grad
   forward : psroi_pool (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height, int pooled_width, int output_channels, float spatial_scale) -> Tensor(out)
   args : (Tensor x, Tensor boxes, Tensor boxes_num, Tensor out_grad, int pooled_height, int pooled_width, int output_channels, float spatial_scale)
@@ -1516,16 +1526,6 @@
   output : Tensor(x_grad)
   invoke : real_grad_impl(out_grad, x_grad)
 
-- backward_op : reduce_prod_grad
-  forward : reduce_prod (Tensor x, IntArray dims, bool keep_dim, bool reduce_all) -> Tensor(out)
-  args : (Tensor x, Tensor out, Tensor out_grad, IntArray dims,  bool keep_dim, bool reduce_all)
-  output : Tensor(x_grad)
-  infer_meta :
-    func : UnchangedInferMeta
-    param : [x]
-  kernel :
-    func : prod_grad
-
 - backward_op : relu6_grad
   forward : relu6 (Tensor x, float threshold) -> Tensor(out)
   args : (Tensor out, Tensor out_grad, float threshold)
@@ -2234,15 +2234,15 @@
   kernel :
     func : triangular_solve_grad
 
-- backward_op : tril_triu_grad
-  forward : tril_triu(Tensor x,  int diagonal,  bool lower) -> Tensor(out)
+- backward_op : tril_grad
+  forward : tril(Tensor x,  int diagonal,  bool lower) -> Tensor(out)
   args : (Tensor out_grad,  int diagonal,  bool lower)
   output : Tensor(x_grad)
   infer_meta :
     func : UnchangedInferMeta
     param : [out_grad]
   kernel :
-    func : tril_triu_grad
+    func : tril_grad
 
 - backward_op : trilinear_interp_grad
   forward : trilinear_interp (Tensor x, Tensor out_size, Tensor[] size_tensor, Tensor scale_tensor, str data_layout, int out_d, int out_h, int out_w, float[] scale, str interp_method, bool align_corners, int align_mode) -> Tensor(output)
@@ -2273,14 +2273,14 @@
     func : unfold_grad
   no_need_buffer : x
 
-- backward_op : uniform_random_inplace_grad
-  forward : uniform_random_inplace(Tensor x, float min, float max, int seed, int diag_num, int diag_step, float diag_val) -> Tensor(out)
+- backward_op : uniform_inplace_grad
+  forward : uniform_inplace(Tensor x, float min, float max, int seed, int diag_num, int diag_step, float diag_val) -> Tensor(out)
   args : (Tensor out_grad, float min, float max, int seed, int diag_num, int diag_step, float diag_val)
   output : Tensor(x_grad)
   infer_meta :
     func : UniformRandomInplaceGradInferMeta
   kernel :
-    func : uniform_random_inplace_grad
+    func : uniform_inplace_grad
   inplace : (out_grad -> x_grad)
 
 - backward_op : unsqueeze_double_grad
@@ -2335,14 +2335,14 @@
     func : where_grad
   no_need_buffer : x, y
 
-- backward_op : yolov3_loss_grad
-  forward : yolov3_loss(Tensor x, Tensor gt_box, Tensor gt_label, Tensor gt_score, int[] anchors, int[] anchor_mask, int class_num, float ignore_thresh, int downsample_ratio, bool use_label_smooth=true, float scale_x_y=1.0) -> Tensor(loss), Tensor(objectness_mask), Tensor(gt_match_mask)
+- backward_op : yolo_loss_grad
+  forward : yolo_loss(Tensor x, Tensor gt_box, Tensor gt_label, Tensor gt_score, int[] anchors, int[] anchor_mask, int class_num, float ignore_thresh, int downsample_ratio, bool use_label_smooth=true, float scale_x_y=1.0) -> Tensor(loss), Tensor(objectness_mask), Tensor(gt_match_mask)
   args : (Tensor x, Tensor gt_box, Tensor gt_label, Tensor gt_score, Tensor objectness_mask, Tensor gt_match_mask, Tensor loss_grad, int[] anchors, int[] anchor_mask, int class_num, float ignore_thresh, int downsample_ratio, bool use_label_smooth=true, float scale_x_y=1.0)
   output : Tensor(x_grad), Tensor(gt_box_grad), Tensor(gt_label_grad), Tensor(gt_score_grad)
   infer_meta :
-    func : Yolov3LossGradInferMeta
+    func : YoloLossGradInferMeta
   kernel :
-    func : yolov3_loss_grad
+    func : yolo_loss_grad
   optional : gt_score
 
 - backward_op: fold_grad
diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml
index c42bc74461..03dbb08b59 100755
--- a/paddle/phi/api/yaml/legacy_ops.yaml
+++ b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -1036,17 +1036,6 @@
     func : hard_tanh
   backward : hardtanh_grad
 
-- op : hierarchical_sigmoid
-  args : (Tensor x, Tensor w, Tensor label, Tensor path, Tensor code, Tensor bias, int num_classes, bool remote_prefetch, int trainer_id, int64_t[] height_sections, str[] epmap, str[] table_names, bool is_sparse)
-  output : Tensor(out), Tensor(pre_out), Tensor(w_out)
-  infer_meta :
-    func : HierarchicalSigmoidInferMeta
-  optional: path, code, bias
-  kernel :
-    func : hierarchical_sigmoid
-    data_type : x
-  backward : hierarchical_sigmoid_grad
-
 - op : histogram
   args : (Tensor input, int64_t bins, int min, int max)
   output : Tensor(out)
@@ -1055,6 +1044,17 @@
   kernel :
     func : histogram
 
+- op : hsigmoid_loss
+  args : (Tensor x, Tensor w, Tensor label, Tensor path, Tensor code, Tensor bias, int num_classes, bool remote_prefetch, int trainer_id, int64_t[] height_sections, str[] epmap, str[] table_names, bool is_sparse)
+  output : Tensor(out), Tensor(pre_out), Tensor(w_out)
+  infer_meta :
+    func : HSigmoidLossInferMeta
+  optional: path, code, bias
+  kernel :
+    func : hsigmoid_loss
+    data_type : x
+  backward : hsigmoid_loss_grad
+
 - op : huber_loss
   args : (Tensor input, Tensor label, float delta)
   output : Tensor(out), Tensor(residual)
@@ -1696,6 +1696,14 @@
     func : nms
     data_type : x
 
+- op : nonzero
+  args : (Tensor condition)
+  output : Tensor(out)
+  infer_meta :
+    func : NonZeroInferMeta
+  kernel :
+    func : nonzero
+
 - op : norm
   args : (Tensor x, int axis, float epsilon, bool is_test)
   output : Tensor(out), Tensor(norm)
@@ -1828,6 +1836,15 @@
   kernel :
     func : prior_box
 
+- op : prod
+  args : (Tensor x, IntArray dims, bool keep_dim, bool reduce_all)
+  output : Tensor
+  infer_meta :
+    func : ReduceIntArrayAxisInferMetaBase
+  kernel :
+    func : prod_raw
+  backward : prod_grad
+
 - op : psroi_pool
   args : (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height, int pooled_width, int output_channels, float spatial_scale)
   output : Tensor
@@ -1893,15 +1910,6 @@
     func : real
   backward : real_grad
 
-- op : reduce_prod
-  args : (Tensor x, IntArray dims, bool keep_dim, bool reduce_all)
-  output : Tensor
-  infer_meta :
-    func : ReduceIntArrayAxisInferMetaBase
-  kernel :
-    func : prod_raw
-  backward : reduce_prod_grad
-
 - op : relu
   args : (Tensor x)
   output : Tensor(out)
@@ -2460,6 +2468,15 @@
     func : triangular_solve
   backward : triangular_solve_grad
 
+- op : tril
+  args : (Tensor x,  int diagonal,  bool lower)
+  output : Tensor(out)
+  infer_meta :
+    func : TrilInferMeta
+  kernel :
+    func : tril
+  backward : tril_grad
+
 - op : tril_indices
   args : (int rows, int cols, int offset, DataType dtype, Place place={})
   output : Tensor(out)
@@ -2472,15 +2489,6 @@
     data_type : dtype
     backend : place
 
-- op : tril_triu
-  args : (Tensor x,  int diagonal,  bool lower)
-  output : Tensor(out)
-  infer_meta :
-    func : TrilTriuInferMeta
-  kernel :
-    func : tril_triu
-  backward : tril_triu_grad
-
 - op : trilinear_interp
   args : (Tensor x, Tensor out_size, Tensor[] size_tensor, Tensor scale_tensor, str data_layout, int out_d, int out_h, int out_w, float[] scale, str interp_method, bool align_corners, int align_mode)
   output : Tensor(output)
@@ -2535,14 +2543,14 @@
     func : unfold
   backward : unfold_grad
 
-- op : uniform_random
+- op : uniform
   args : (IntArray shape,  DataType dtype,  Scalar min,  Scalar max,  int seed, Place place={})
   output : Tensor(out)
   infer_meta :
     func : UniformRandomInferMeta
     param: [shape, dtype]
   kernel :
-    func : uniform_random
+    func : uniform
     param: [shape, dtype, min, max, seed]
     data_type : dtype
     backend : place
@@ -2628,14 +2636,6 @@
     func : where
   backward : where_grad
 
-- op : where_index
-  args : (Tensor condition)
-  output : Tensor(out)
-  infer_meta :
-    func : WhereIndexInferMeta
-  kernel :
-    func : where_index
-
 - op : yolo_box
   args : (Tensor x, Tensor img_size, int[] anchors, int class_num, float conf_thresh, int downsample_ratio, bool clip_bbox, float scale_x_y=1.0, bool iou_aware=false, float iou_aware_factor=0.5)
   output : Tensor(boxes), Tensor(scores)
@@ -2645,16 +2645,16 @@
     func : yolo_box
     data_type : x
 
-- op : yolov3_loss
+- op : yolo_loss
   args : (Tensor x, Tensor gt_box, Tensor gt_label, Tensor gt_score, int[] anchors, int[] anchor_mask, int class_num, float ignore_thresh, int downsample_ratio, bool use_label_smooth=true, float scale_x_y=1.0)
   output : Tensor(loss), Tensor(objectness_mask), Tensor(gt_match_mask)
   infer_meta :
-    func : Yolov3LossInferMeta
+    func : YoloLossInferMeta
   kernel :
-    func : yolov3_loss
+    func : yolo_loss
     data_type : x
   optional : gt_score
-  backward : yolov3_loss_grad
+  backward : yolo_loss_grad
 
 - op : zeros
   args : (IntArray shape, DataType dtype=DataType::FLOAT32, Place place=CPUPlace())
@@ -2734,16 +2734,16 @@
   intermediate : reserve
   view : (dropout_state_in -> dropout_state_out)
 
-- op: uniform_random_inplace
+- op: uniform_inplace
   args: (Tensor x, float min, float max, int seed, int diag_num, int diag_step, float diag_val)
   output: Tensor(out)
   infer_meta:
     func: UniformRandomInplaceInferMeta
   kernel:
-    func: uniform_random_inplace
+    func: uniform_inplace
     data_type: x
   inplace: (x -> out)
-  backward: uniform_random_inplace_grad
+  backward: uniform_inplace_grad
 
 - op: unpool
   args: (Tensor x, Tensor indices, int[] ksize, int[] strides, int[] padding, IntArray output_size, str data_format)
diff --git a/paddle/phi/infermeta/backward.cc b/paddle/phi/infermeta/backward.cc
index 6f8a60c923..ef2f384cb6 100644
--- a/paddle/phi/infermeta/backward.cc
+++ b/paddle/phi/infermeta/backward.cc
@@ -987,24 +987,24 @@ void UnStackGradInferMeta(const std::vector<const MetaTensor*>& out_grad,
   x_grad->set_dtype(out_grad[0]->dtype());
 }
 
-void Yolov3LossGradInferMeta(const MetaTensor& x,
-                             const MetaTensor& gt_box,
-                             const MetaTensor& gt_label,
-                             const MetaTensor& gt_score,
-                             const MetaTensor& objectness_mask,
-                             const MetaTensor& gt_match_mask,
-                             const MetaTensor& loss_grad,
-                             const std::vector<int>& anchors,
-                             const std::vector<int>& anchor_mask,
-                             int class_num,
-                             float ignore_thresh,
-                             int downsample_ratio,
-                             bool use_label_smooth,
-                             float scale_x_y,
-                             MetaTensor* x_grad,
-                             MetaTensor* gt_box_grad,
-                             MetaTensor* gt_label_grad,
-                             MetaTensor* gt_score_grad) {
+void YoloLossGradInferMeta(const MetaTensor& x,
+                           const MetaTensor& gt_box,
+                           const MetaTensor& gt_label,
+                           const MetaTensor& gt_score,
+                           const MetaTensor& objectness_mask,
+                           const MetaTensor& gt_match_mask,
+                           const MetaTensor& loss_grad,
+                           const std::vector<int>& anchors,
+                           const std::vector<int>& anchor_mask,
+                           int class_num,
+                           float ignore_thresh,
+                           int downsample_ratio,
+                           bool use_label_smooth,
+                           float scale_x_y,
+                           MetaTensor* x_grad,
+                           MetaTensor* gt_box_grad,
+                           MetaTensor* gt_label_grad,
+                           MetaTensor* gt_score_grad) {
   if (x_grad) {
     x_grad->set_dims(x.dims());
     x_grad->set_dtype(x.dtype());
diff --git a/paddle/phi/infermeta/backward.h b/paddle/phi/infermeta/backward.h
index dd86055978..01cdc8023a 100644
--- a/paddle/phi/infermeta/backward.h
+++ b/paddle/phi/infermeta/backward.h
@@ -385,24 +385,24 @@ void UnStackGradInferMeta(const std::vector<const MetaTensor*>& out_grad,
                           int axis,
                           MetaTensor* x_grad);
 
-void Yolov3LossGradInferMeta(const MetaTensor& x,
-                             const MetaTensor& gt_box,
-                             const MetaTensor& gt_label,
-                             const MetaTensor& gt_score,
-                             const MetaTensor& objectness_mask,
-                             const MetaTensor& gt_match_mask,
-                             const MetaTensor& loss_grad,
-                             const std::vector<int>& anchors,
-                             const std::vector<int>& anchor_mask,
-                             int class_num,
-                             float ignore_thresh,
-                             int downsample_ratio,
-                             bool use_label_smooth,
-                             float scale_x_y,
-                             MetaTensor* x_grad,
-                             MetaTensor* gt_box_grad,
-                             MetaTensor* gt_label_grad,
-                             MetaTensor* gt_score_grad);
+void YoloLossGradInferMeta(const MetaTensor& x,
+                           const MetaTensor& gt_box,
+                           const MetaTensor& gt_label,
+                           const MetaTensor& gt_score,
+                           const MetaTensor& objectness_mask,
+                           const MetaTensor& gt_match_mask,
+                           const MetaTensor& loss_grad,
+                           const std::vector<int>& anchors,
+                           const std::vector<int>& anchor_mask,
+                           int class_num,
+                           float ignore_thresh,
+                           int downsample_ratio,
+                           bool use_label_smooth,
+                           float scale_x_y,
+                           MetaTensor* x_grad,
+                           MetaTensor* gt_box_grad,
+                           MetaTensor* gt_label_grad,
+                           MetaTensor* gt_score_grad);
 
 void IndexAddGradInferMeta(const MetaTensor& index,
                            const MetaTensor& add_value,
diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc
index 91d2642139..52050f160e 100644
--- a/paddle/phi/infermeta/multiary.cc
+++ b/paddle/phi/infermeta/multiary.cc
@@ -1328,22 +1328,22 @@ void GraphSampleNeighborsInferMeta(const MetaTensor& row,
   out_count->set_dtype(DataType::INT32);
 }
 
-void HierarchicalSigmoidInferMeta(const MetaTensor& x,
-                                  const MetaTensor& w,
-                                  const MetaTensor& label,
-                                  const MetaTensor& path,
-                                  const MetaTensor& code,
-                                  const MetaTensor& bias,
-                                  int num_classes,
-                                  bool remote_prefetch,
-                                  int trainer_id,
-                                  const std::vector<int64_t>& height_sections,
-                                  const std::vector<std::string>& epmap,
-                                  const std::vector<std::string>& table_names,
-                                  bool is_sparse,
-                                  MetaTensor* out,
-                                  MetaTensor* pre_out,
-                                  MetaTensor* w_out) {
+void HSigmoidLossInferMeta(const MetaTensor& x,
+                           const MetaTensor& w,
+                           const MetaTensor& label,
+                           const MetaTensor& path,
+                           const MetaTensor& code,
+                           const MetaTensor& bias,
+                           int num_classes,
+                           bool remote_prefetch,
+                           int trainer_id,
+                           const std::vector<int64_t>& height_sections,
+                           const std::vector<std::string>& epmap,
+                           const std::vector<std::string>& table_names,
+                           bool is_sparse,
+                           MetaTensor* out,
+                           MetaTensor* pre_out,
+                           MetaTensor* w_out) {
   const int64_t input_dims = x.dims()[0];
   const int64_t label_dims = label.dims()[0];
   PADDLE_ENFORCE_EQ(input_dims,
@@ -2762,20 +2762,20 @@ void WhereInferMeta(const MetaTensor& condition,
   out->share_meta(x);
 }
 
-void Yolov3LossInferMeta(const MetaTensor& x,
-                         const MetaTensor& gt_box,
-                         const MetaTensor& gt_label,
-                         const MetaTensor& gt_score,
-                         const std::vector<int>& anchors,
-                         const std::vector<int>& anchor_mask,
-                         int class_num,
-                         float ignore_thresh,
-                         int downsample_ratio,
-                         bool use_label_smooth,
-                         float scale_x_y,
-                         MetaTensor* loss,
-                         MetaTensor* objectness_mask,
-                         MetaTensor* gt_match_mask) {
+void YoloLossInferMeta(const MetaTensor& x,
+                       const MetaTensor& gt_box,
+                       const MetaTensor& gt_label,
+                       const MetaTensor& gt_score,
+                       const std::vector<int>& anchors,
+                       const std::vector<int>& anchor_mask,
+                       int class_num,
+                       float ignore_thresh,
+                       int downsample_ratio,
+                       bool use_label_smooth,
+                       float scale_x_y,
+                       MetaTensor* loss,
+                       MetaTensor* objectness_mask,
+                       MetaTensor* gt_match_mask) {
   auto dim_x = x.dims();
   auto dim_gtbox = gt_box.dims();
   auto dim_gtlabel = gt_label.dims();
diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h
index 0dfb1307c0..79926e06b2 100644
--- a/paddle/phi/infermeta/multiary.h
+++ b/paddle/phi/infermeta/multiary.h
@@ -288,22 +288,22 @@ void GraphSampleNeighborsInferMeta(const MetaTensor& row,
                                    MetaTensor* out_count,
                                    MetaTensor* out_eids);
 
-void HierarchicalSigmoidInferMeta(const MetaTensor& x,
-                                  const MetaTensor& w,
-                                  const MetaTensor& label,
-                                  const MetaTensor& path,
-                                  const MetaTensor& code,
-                                  const MetaTensor& bias,
-                                  int num_classes,
-                                  bool remote_prefetch,
-                                  int trainer_id,
-                                  const std::vector<int64_t>& height_sections,
-                                  const std::vector<std::string>& epmap,
-                                  const std::vector<std::string>& table_names,
-                                  bool is_sparse,
-                                  MetaTensor* out,
-                                  MetaTensor* pre_out,
-                                  MetaTensor* w_out);
+void HSigmoidLossInferMeta(const MetaTensor& x,
+                           const MetaTensor& w,
+                           const MetaTensor& label,
+                           const MetaTensor& path,
+                           const MetaTensor& code,
+                           const MetaTensor& bias,
+                           int num_classes,
+                           bool remote_prefetch,
+                           int trainer_id,
+                           const std::vector<int64_t>& height_sections,
+                           const std::vector<std::string>& epmap,
+                           const std::vector<std::string>& table_names,
+                           bool is_sparse,
+                           MetaTensor* out,
+                           MetaTensor* pre_out,
+                           MetaTensor* w_out);
 
 void InterpolateInferMeta(
     const MetaTensor& x,
@@ -508,19 +508,19 @@ void WhereInferMeta(const MetaTensor& condition,
                     const MetaTensor& y,
                     MetaTensor* out);
 
-void Yolov3LossInferMeta(const MetaTensor& x,
-                         const MetaTensor& gt_box,
-                         const MetaTensor& gt_label,
-                         const MetaTensor& gt_score,
-                         const std::vector<int>& anchors,
-                         const std::vector<int>& anchor_mask,
-                         int class_num,
-                         float ignore_thresh,
-                         int downsample_ratio,
-                         bool use_label_smooth,
-                         float scale_x_y,
-                         MetaTensor* loss,
-                         MetaTensor* objectness_mask,
-                         MetaTensor* gt_match_mask);
+void YoloLossInferMeta(const MetaTensor& x,
+                       const MetaTensor& gt_box,
+                       const MetaTensor& gt_label,
+                       const MetaTensor& gt_score,
+                       const std::vector<int>& anchors,
+                       const std::vector<int>& anchor_mask,
+                       int class_num,
+                       float ignore_thresh,
+                       int downsample_ratio,
+                       bool use_label_smooth,
+                       float scale_x_y,
+                       MetaTensor* loss,
+                       MetaTensor* objectness_mask,
+                       MetaTensor* gt_match_mask);
 
 }  // namespace phi
diff --git a/paddle/phi/infermeta/ternary.cc b/paddle/phi/infermeta/ternary.cc
index 874432aedd..c831070735 100644
--- a/paddle/phi/infermeta/ternary.cc
+++ b/paddle/phi/infermeta/ternary.cc
@@ -402,64 +402,6 @@ void InstanceNormInferMeta(const MetaTensor& x,
   }
 }
 
-void SendURecvInferMeta(const MetaTensor& x,
-                        const MetaTensor& src_index,
-                        const MetaTensor& dst_index,
-                        const std::string& reduce_op,
-                        const IntArray& out_size,
-                        MetaTensor* out,
-                        MetaTensor* dst_count) {
-  auto src_index_dims = src_index.dims();
-  if (src_index_dims.size() == 2) {
-    PADDLE_ENFORCE_EQ(src_index_dims[1],
-                      1,
-                      phi::errors::InvalidArgument(
-                          "The last dim of Src_index should be 1 when it "
-                          "is 2D, but we get %d",
-                          src_index_dims[1]));
-  } else {
-    PADDLE_ENFORCE_EQ(
-        src_index_dims.size(),
-        1,
-        phi::errors::InvalidArgument(
-            "The Src_index should be 1D, when it is not 2D, but we get %d",
-            src_index_dims.size()));
-  }
-
-  auto dst_index_dims = dst_index.dims();
-  if (dst_index_dims.size() == 2) {
-    PADDLE_ENFORCE_EQ(dst_index_dims[1],
-                      1,
-                      phi::errors::InvalidArgument(
-                          "The last dim of Dst_index should be 1 when it "
-                          "is 2D, but we get %d",
-                          dst_index_dims[1]));
-  } else {
-    PADDLE_ENFORCE_EQ(
-        dst_index_dims.size(),
-        1,
-        phi::errors::InvalidArgument("The Dst_index should be 1D, "
-                                     "when it is not 2D, but we get %d",
-                                     dst_index_dims.size()));
-  }
-
-  PADDLE_ENFORCE_EQ(src_index_dims[0],
-                    dst_index_dims[0],
-                    phi::errors::InvalidArgument(
-                        "Src_index and Dst_index should have the same shape."));
-
-  auto dims = x.dims();
-  std::vector<int64_t> dims_ = phi::vectorize(dims);
-  dims_[0] = -1;
-  out->set_dims(phi::make_ddim(dims_));
-  out->set_dtype(x.dtype());
-
-  if (reduce_op == "MEAN") {
-    dst_count->set_dims({-1});
-    dst_count->set_dtype(DataType::INT32);
-  }
-}
-
 void GroupNormInferMeta(const MetaTensor& x,
                         const MetaTensor& scale,
                         const MetaTensor& bias,
@@ -1164,6 +1106,64 @@ void ScatterNdAddInferMeta(const MetaTensor& x,
   out->set_dtype(x.dtype());
 }
 
+void SendURecvInferMeta(const MetaTensor& x,
+                        const MetaTensor& src_index,
+                        const MetaTensor& dst_index,
+                        const std::string& reduce_op,
+                        const IntArray& out_size,
+                        MetaTensor* out,
+                        MetaTensor* dst_count) {
+  auto src_index_dims = src_index.dims();
+  if (src_index_dims.size() == 2) {
+    PADDLE_ENFORCE_EQ(src_index_dims[1],
+                      1,
+                      phi::errors::InvalidArgument(
+                          "The last dim of Src_index should be 1 when it "
+                          "is 2D, but we get %d",
+                          src_index_dims[1]));
+  } else {
+    PADDLE_ENFORCE_EQ(
+        src_index_dims.size(),
+        1,
+        phi::errors::InvalidArgument(
+            "The Src_index should be 1D, when it is not 2D, but we get %d",
+            src_index_dims.size()));
+  }
+
+  auto dst_index_dims = dst_index.dims();
+  if (dst_index_dims.size() == 2) {
+    PADDLE_ENFORCE_EQ(dst_index_dims[1],
+                      1,
+                      phi::errors::InvalidArgument(
+                          "The last dim of Dst_index should be 1 when it "
+                          "is 2D, but we get %d",
+                          dst_index_dims[1]));
+  } else {
+    PADDLE_ENFORCE_EQ(
+        dst_index_dims.size(),
+        1,
+        phi::errors::InvalidArgument("The Dst_index should be 1D, "
+                                     "when it is not 2D, but we get %d",
+                                     dst_index_dims.size()));
+  }
+
+  PADDLE_ENFORCE_EQ(src_index_dims[0],
+                    dst_index_dims[0],
+                    phi::errors::InvalidArgument(
+                        "Src_index and Dst_index should have the same shape."));
+
+  auto dims = x.dims();
+  std::vector<int64_t> dims_ = phi::vectorize(dims);
+  dims_[0] = -1;
+  out->set_dims(phi::make_ddim(dims_));
+  out->set_dtype(x.dtype());
+
+  if (reduce_op == "MEAN") {
+    dst_count->set_dims({-1});
+    dst_count->set_dtype(DataType::INT32);
+  }
+}
+
 void SpectralNormInferMeta(const MetaTensor& weight,
                            const MetaTensor& u,
                            const MetaTensor& v,
diff --git a/paddle/phi/infermeta/ternary.h b/paddle/phi/infermeta/ternary.h
index e0b1573e16..1d0e7e8744 100644
--- a/paddle/phi/infermeta/ternary.h
+++ b/paddle/phi/infermeta/ternary.h
@@ -72,14 +72,6 @@ void InstanceNormInferMeta(const MetaTensor& x,
                            MetaTensor* saved_variance,
                            MetaConfig config = MetaConfig());
 
-void SendURecvInferMeta(const MetaTensor& x,
-                        const MetaTensor& src_index,
-                        const MetaTensor& dst_index,
-                        const std::string& reduce_op,
-                        const IntArray& out_size,
-                        MetaTensor* out,
-                        MetaTensor* dst_count);
-
 void GroupNormInferMeta(const MetaTensor& x,
                         const MetaTensor& scale,
                         const MetaTensor& bias,
@@ -186,6 +178,14 @@ void ScatterNdAddInferMeta(const MetaTensor& x,
                            const MetaTensor& updates,
                            MetaTensor* out);
 
+void SendURecvInferMeta(const MetaTensor& x,
+                        const MetaTensor& src_index,
+                        const MetaTensor& dst_index,
+                        const std::string& reduce_op,
+                        const IntArray& out_size,
+                        MetaTensor* out,
+                        MetaTensor* dst_count);
+
 void SpectralNormInferMeta(const MetaTensor& weight,
                            const MetaTensor& u,
                            const MetaTensor& v,
diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc
index d83477de96..bfc769032d 100644
--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -1204,6 +1204,211 @@ void FlipInferMeta(const MetaTensor& x,
   out->share_lod(x);
 }
 
+void FoldInferMeta(const MetaTensor& x,
+                   const std::vector<int>& output_sizes,
+                   const std::vector<int>& kernel_sizes,
+                   const std::vector<int>& strides,
+                   const std::vector<int>& paddings,
+                   const std::vector<int>& dilations,
+                   MetaTensor* out) {
+  auto in_dims = x.dims();
+
+  PADDLE_ENFORCE_EQ(
+      output_sizes.size(),
+      2,
+      phi::errors::InvalidArgument(
+          "It is expected output_size equals to 2, but got size %d",
+          output_sizes.size()));
+  PADDLE_ENFORCE_EQ(
+      kernel_sizes.size(),
+      2,
+      phi::errors::InvalidArgument(
+          "It is expected kernel_size equals to 2, but got size %d",
+          kernel_sizes.size()));
+  PADDLE_ENFORCE_EQ(
+      strides.size(),
+      2,
+      phi::errors::InvalidArgument(
+          "It is expected strides_size equals to 2, but got size %d",
+          strides.size()));
+  PADDLE_ENFORCE_EQ(
+      paddings.size(),
+      4,
+      phi::errors::InvalidArgument(
+          "It is expected paddings_size equals to 4, but got size %d",
+          paddings.size()));
+
+  PADDLE_ENFORCE_EQ(
+      dilations.size(),
+      2,
+      phi::errors::InvalidArgument(
+          "It is expected dilations_size equals to 2, but got size %d",
+          dilations.size()));
+
+  int output_height = output_sizes[0];
+  int output_width = output_sizes[1];
+  int kernel_height = kernel_sizes[0];
+  int kernel_width = kernel_sizes[1];
+  int dilation_height = dilations[0];
+  int dilation_width = dilations[1];
+  int stride_height = strides[0];
+  int stride_width = strides[1];
+
+  // check kernel_sizes
+  PADDLE_ENFORCE_GT(kernel_height,
+                    0,
+                    phi::errors::InvalidArgument(
+                        "The `kernel_sizes` should be greater than zero, "
+                        "but received kernel_height: %d kernel_width: %d.",
+                        kernel_sizes[0],
+                        kernel_sizes[1]));
+  PADDLE_ENFORCE_GT(kernel_width,
+                    0,
+                    phi::errors::InvalidArgument(
+                        "The `kernel_sizes` should be greater than zero, "
+                        "but received kernel_height: %d kernel_width: %d.",
+                        kernel_sizes[0],
+                        kernel_sizes[1]));
+  // check strides
+  PADDLE_ENFORCE_GT(stride_height,
+                    0,
+                    phi::errors::InvalidArgument(
+                        "The `strides` should be greater than zero, "
+                        "but received strides_height: %d strides_width: %d.",
+                        strides[0],
+                        strides[1]));
+  PADDLE_ENFORCE_GT(stride_width,
+                    0,
+                    phi::errors::InvalidArgument(
+                        "The `strides` should be greater than zero, "
+                        "but received strides_height: %d strides_width: %d.",
+                        strides[0],
+                        strides[1]));
+  // check dilations
+  PADDLE_ENFORCE_GT(output_height,
+                    1,
+                    phi::errors::InvalidArgument(
+                        "The `output_height` should be greater than one, "
+                        "but received output_height: %d .",
+                        output_height));
+  PADDLE_ENFORCE_GT(output_width,
+                    1,
+                    phi::errors::InvalidArgument(
+                        "The `output_width` should be greater than one, "
+                        "but received output_width: %d .",
+                        output_width));
+  // check output size
+  PADDLE_ENFORCE_GT(
+      dilation_height,
+      0,
+      phi::errors::InvalidArgument(
+          "The `dilations` should be greater than zero, "
+          "but received dilations_height: %d dilations_width: %d.",
+          dilations[0],
+          dilations[1]));
+  PADDLE_ENFORCE_GT(
+      dilation_width,
+      0,
+      phi::errors::InvalidArgument(
+          "The `dilations` should be greater than zero, "
+          "but received dilations_height: %d dilations_width: %d.",
+          dilations[0],
+          dilations[1]));
+
+  std::vector<int> out_dims;
+  // batch_size
+  out_dims.push_back(in_dims[0]);
+  // output_plane
+  int output_channels = in_dims[1] / (kernel_width * kernel_height);
+  out_dims.push_back(output_channels);
+
+  int blocks_height = (output_sizes[0] + 2 * paddings[0] -
+                       (dilations[0] * (kernel_sizes[0] - 1) + 1)) /
+                          strides[0] +
+                      1;
+  int blocks_width = (output_sizes[1] + 2 * paddings[1] -
+                      (dilations[1] * (kernel_sizes[1] - 1) + 1)) /
+                         strides[1] +
+                     1;
+
+  // check output height and width
+  PADDLE_ENFORCE_GT(
+      blocks_height,
+      0,
+      phi::errors::InvalidArgument(
+          "The sliding blocks calculated from input spatial size (%d, %d), "
+          "kernel_sizes (%d, %d), strides (%d, %d), dilations (%d, %d), "
+          "is (%d, %d), which should be a positive integer.",
+          in_dims[2],
+          in_dims[3],
+          kernel_sizes[0],
+          kernel_sizes[1],
+          strides[0],
+          strides[1],
+          dilations[0],
+          dilations[1],
+          output_height,
+          output_width));
+
+  PADDLE_ENFORCE_GT(
+      blocks_width,
+      0,
+      phi::errors::InvalidArgument(
+          "The sliding blocks calculated from input spatial size (%d, %d), "
+          "kernel_sizes (%d, %d), strides (%d, %d), dilations (%d, %d), "
+          "is (%d, %d), which should be a positive integer.",
+          in_dims[2],
+          in_dims[3],
+          kernel_sizes[0],
+          kernel_sizes[1],
+          strides[0],
+          strides[1],
+          dilations[0],
+          dilations[1],
+          output_height,
+          output_width));
+
+  PADDLE_ENFORCE_EQ(
+      blocks_height * blocks_width,
+      in_dims[2],
+      phi::errors::InvalidArgument(
+          "Given input output_size (%d, %d), "
+          "kernel_sizes (%d, %d), strides (%d, %d), dilations (%d, %d), "
+          "which should be expected size of input's dimension "
+          "2 to match the calculated number of %d * %d = %d, but got %d",
+          output_height,
+          output_width,
+          kernel_sizes[0],
+          kernel_sizes[1],
+          strides[0],
+          strides[1],
+          dilations[0],
+          dilations[1],
+          blocks_height,
+          blocks_width,
+          blocks_height * blocks_width,
+          in_dims[2]));
+
+  PADDLE_ENFORCE_EQ(
+      in_dims[1] % (kernel_sizes[0] * kernel_sizes[1]),
+      0,
+      phi::errors::InvalidArgument(
+          "Expected size of input's dimension 1 to be divisible by the"
+          "product of kernel_size, but got input.size(1)=%d and "
+          "kernel_size=( %d"
+          ", %d).",
+          in_dims[1],
+          kernel_sizes[0],
+          kernel_sizes[1]));
+
+  out_dims.push_back(output_height);
+  out_dims.push_back(output_width);
+  if (out != nullptr) {
+    out->set_dims(phi::make_ddim(out_dims));
+    out->set_dtype(x.dtype());
+  }
+}
+
 void FrameInferMeta(const MetaTensor& x,
                     int frame_length,
                     int hop_length,
@@ -1327,6 +1532,18 @@ void HistogramInferMeta(
   out->share_lod(input);
 }
 
+void IdentityLossInferMeta(const MetaTensor& x,
+                           int reduction,
+                           MetaTensor* out) {
+  if (reduction == 2) {
+    out->set_dtype(x.dtype());
+    out->set_dims(x.dims());
+  } else {
+    out->set_dims(phi::make_ddim({1}));
+    out->set_dtype(x.dtype());
+  }
+}
+
 void IncrementInferMeta(const MetaTensor& x, float value, MetaTensor* out) {
   PADDLE_ENFORCE_EQ(
       product(x.dims()),
@@ -2036,6 +2253,17 @@ void NMSInferMeta(const MetaTensor& x, float threshold, MetaTensor* out) {
   out->set_dtype(DataType::INT64);
 }
 
+void NonZeroInferMeta(const MetaTensor& condition, MetaTensor* out) {
+  auto rank = condition.dims().size();
+  PADDLE_ENFORCE_GE(
+      rank,
+      1UL,
+      phi::errors::InvalidArgument(
+          "Input(Condition) should have number of dimension at least 1"));
+  out->set_dims(phi::make_ddim({-1, rank}));
+  out->set_dtype(DataType::INT64);
+}
+
 void NormInferMeta(const MetaTensor& x,
                    int axis,
                    float epsilon,
@@ -2054,16 +2282,53 @@ void NormInferMeta(const MetaTensor& x,
   }
 }
 
-void OverlapAddInferMeta(const MetaTensor& x,
-                         int hop_length,
-                         int axis,
-                         MetaTensor* out,
-                         MetaConfig config) {
-  const auto x_dims = x.dims();
-  const int x_rank = x_dims.size();
+void OneHotRawInferMeta(const MetaTensor& x,
+                        const Scalar& depth,
+                        DataType dtype,
+                        bool allow_out_of_range,
+                        MetaTensor* out) {
+  auto x_dims = x.dims();
+  PADDLE_ENFORCE_GE(
+      x_dims.size(),
+      1,
+      phi::errors::InvalidArgument("Rank of Input(X) should be at least 1."));
+  auto out_dims_vec = phi::vectorize(x_dims);
+  out_dims_vec.push_back(depth.to<int>());
+  auto out_dims = phi::make_ddim(out_dims_vec);
+  out->set_dims(out_dims);
+  out->share_lod(x);
+  out->set_dtype(dtype);
+}
 
+void OneHotInferMeta(const MetaTensor& x,
+                     const Scalar& depth_t,
+                     MetaTensor* out) {
+  auto x_dims = x.dims();
   PADDLE_ENFORCE_GE(
-      x_rank,
+      x_dims.size(),
+      1,
+      phi::errors::InvalidArgument("Rank of Input(X) should be at least 1."));
+
+  int depth = depth_t.to<int>();
+  auto out_dims_vec = phi::vectorize(x_dims);
+  out_dims_vec.push_back(depth);
+  auto out_dims = phi::make_ddim(out_dims_vec);
+  out->set_dims(out_dims);
+  out->share_lod(x);
+
+  out->set_dtype(phi::DataType::FLOAT32);
+}
+
+void OverlapAddInferMeta(const MetaTensor& x,
+                         int hop_length,
+                         int axis,
+                         MetaTensor* out,
+                         MetaConfig config) {
+  const auto x_dims = x.dims();
+  const int x_rank = x_dims.size();
+
+  PADDLE_ENFORCE_GE(
+      x_rank,
       2,
       errors::InvalidArgument(
           "Input(X) of OverlapAddOp should be a tensor which contains "
@@ -3956,10 +4221,10 @@ void UnbindInferMeta(const MetaTensor& x,
   }
 }
 
-void TrilTriuInferMeta(const MetaTensor& x,
-                       int diagonal,
-                       bool lower,
-                       MetaTensor* out) {
+void TrilInferMeta(const MetaTensor& x,
+                   int diagonal,
+                   bool lower,
+                   MetaTensor* out) {
   const auto& x_dims = x.dims();
   PADDLE_ENFORCE_GE(x_dims.size(),
                     2,
@@ -4442,54 +4707,6 @@ void UnStackInferMeta(const MetaTensor& x,
   }
 }
 
-void OneHotRawInferMeta(const MetaTensor& x,
-                        const Scalar& depth,
-                        DataType dtype,
-                        bool allow_out_of_range,
-                        MetaTensor* out) {
-  auto x_dims = x.dims();
-  PADDLE_ENFORCE_GE(
-      x_dims.size(),
-      1,
-      phi::errors::InvalidArgument("Rank of Input(X) should be at least 1."));
-  auto out_dims_vec = phi::vectorize(x_dims);
-  out_dims_vec.push_back(depth.to<int>());
-  auto out_dims = phi::make_ddim(out_dims_vec);
-  out->set_dims(out_dims);
-  out->share_lod(x);
-  out->set_dtype(dtype);
-}
-
-void OneHotInferMeta(const MetaTensor& x,
-                     const Scalar& depth_t,
-                     MetaTensor* out) {
-  auto x_dims = x.dims();
-  PADDLE_ENFORCE_GE(
-      x_dims.size(),
-      1,
-      phi::errors::InvalidArgument("Rank of Input(X) should be at least 1."));
-
-  int depth = depth_t.to<int>();
-  auto out_dims_vec = phi::vectorize(x_dims);
-  out_dims_vec.push_back(depth);
-  auto out_dims = phi::make_ddim(out_dims_vec);
-  out->set_dims(out_dims);
-  out->share_lod(x);
-
-  out->set_dtype(phi::DataType::FLOAT32);
-}
-
-void WhereIndexInferMeta(const MetaTensor& condition, MetaTensor* out) {
-  auto rank = condition.dims().size();
-  PADDLE_ENFORCE_GE(
-      rank,
-      1UL,
-      phi::errors::InvalidArgument(
-          "Input(Condition) should have number of dimension at least 1"));
-  out->set_dims(phi::make_ddim({-1, rank}));
-  out->set_dtype(DataType::INT64);
-}
-
 void ChannelShuffleInferMeta(const MetaTensor& x,
                              int groups,
                              const std::string& data_format,
@@ -4536,223 +4753,6 @@ void ChannelShuffleInferMeta(const MetaTensor& x,
   out->set_dims(output_dims);
 }
 
-void IdentityLossInferMeta(const MetaTensor& x,
-                           int reduction,
-                           MetaTensor* out) {
-  if (reduction == 2) {
-    out->set_dtype(x.dtype());
-    out->set_dims(x.dims());
-  } else {
-    out->set_dims(phi::make_ddim({1}));
-    out->set_dtype(x.dtype());
-  }
-}
-
-void FoldInferMeta(const MetaTensor& x,
-                   const std::vector<int>& output_sizes,
-                   const std::vector<int>& kernel_sizes,
-                   const std::vector<int>& strides,
-                   const std::vector<int>& paddings,
-                   const std::vector<int>& dilations,
-                   MetaTensor* out) {
-  auto in_dims = x.dims();
-
-  PADDLE_ENFORCE_EQ(
-      output_sizes.size(),
-      2,
-      phi::errors::InvalidArgument(
-          "It is expected output_size equals to 2, but got size %d",
-          output_sizes.size()));
-  PADDLE_ENFORCE_EQ(
-      kernel_sizes.size(),
-      2,
-      phi::errors::InvalidArgument(
-          "It is expected kernel_size equals to 2, but got size %d",
-          kernel_sizes.size()));
-  PADDLE_ENFORCE_EQ(
-      strides.size(),
-      2,
-      phi::errors::InvalidArgument(
-          "It is expected strides_size equals to 2, but got size %d",
-          strides.size()));
-  PADDLE_ENFORCE_EQ(
-      paddings.size(),
-      4,
-      phi::errors::InvalidArgument(
-          "It is expected paddings_size equals to 4, but got size %d",
-          paddings.size()));
-
-  PADDLE_ENFORCE_EQ(
-      dilations.size(),
-      2,
-      phi::errors::InvalidArgument(
-          "It is expected dilations_size equals to 2, but got size %d",
-          dilations.size()));
-
-  int output_height = output_sizes[0];
-  int output_width = output_sizes[1];
-  int kernel_height = kernel_sizes[0];
-  int kernel_width = kernel_sizes[1];
-  int dilation_height = dilations[0];
-  int dilation_width = dilations[1];
-  int stride_height = strides[0];
-  int stride_width = strides[1];
-
-  // check kernel_sizes
-  PADDLE_ENFORCE_GT(kernel_height,
-                    0,
-                    phi::errors::InvalidArgument(
-                        "The `kernel_sizes` should be greater than zero, "
-                        "but received kernel_height: %d kernel_width: %d.",
-                        kernel_sizes[0],
-                        kernel_sizes[1]));
-  PADDLE_ENFORCE_GT(kernel_width,
-                    0,
-                    phi::errors::InvalidArgument(
-                        "The `kernel_sizes` should be greater than zero, "
-                        "but received kernel_height: %d kernel_width: %d.",
-                        kernel_sizes[0],
-                        kernel_sizes[1]));
-  // check strides
-  PADDLE_ENFORCE_GT(stride_height,
-                    0,
-                    phi::errors::InvalidArgument(
-                        "The `strides` should be greater than zero, "
-                        "but received strides_height: %d strides_width: %d.",
-                        strides[0],
-                        strides[1]));
-  PADDLE_ENFORCE_GT(stride_width,
-                    0,
-                    phi::errors::InvalidArgument(
-                        "The `strides` should be greater than zero, "
-                        "but received strides_height: %d strides_width: %d.",
-                        strides[0],
-                        strides[1]));
-  // check dilations
-  PADDLE_ENFORCE_GT(output_height,
-                    1,
-                    phi::errors::InvalidArgument(
-                        "The `output_height` should be greater than one, "
-                        "but received output_height: %d .",
-                        output_height));
-  PADDLE_ENFORCE_GT(output_width,
-                    1,
-                    phi::errors::InvalidArgument(
-                        "The `output_width` should be greater than one, "
-                        "but received output_width: %d .",
-                        output_width));
-  // check output size
-  PADDLE_ENFORCE_GT(
-      dilation_height,
-      0,
-      phi::errors::InvalidArgument(
-          "The `dilations` should be greater than zero, "
-          "but received dilations_height: %d dilations_width: %d.",
-          dilations[0],
-          dilations[1]));
-  PADDLE_ENFORCE_GT(
-      dilation_width,
-      0,
-      phi::errors::InvalidArgument(
-          "The `dilations` should be greater than zero, "
-          "but received dilations_height: %d dilations_width: %d.",
-          dilations[0],
-          dilations[1]));
-
-  std::vector<int> out_dims;
-  // batch_size
-  out_dims.push_back(in_dims[0]);
-  // output_plane
-  int output_channels = in_dims[1] / (kernel_width * kernel_height);
-  out_dims.push_back(output_channels);
-
-  int blocks_height = (output_sizes[0] + 2 * paddings[0] -
-                       (dilations[0] * (kernel_sizes[0] - 1) + 1)) /
-                          strides[0] +
-                      1;
-  int blocks_width = (output_sizes[1] + 2 * paddings[1] -
-                      (dilations[1] * (kernel_sizes[1] - 1) + 1)) /
-                         strides[1] +
-                     1;
-
-  // check output height and width
-  PADDLE_ENFORCE_GT(
-      blocks_height,
-      0,
-      phi::errors::InvalidArgument(
-          "The sliding blocks calculated from input spatial size (%d, %d), "
-          "kernel_sizes (%d, %d), strides (%d, %d), dilations (%d, %d), "
-          "is (%d, %d), which should be a positive integer.",
-          in_dims[2],
-          in_dims[3],
-          kernel_sizes[0],
-          kernel_sizes[1],
-          strides[0],
-          strides[1],
-          dilations[0],
-          dilations[1],
-          output_height,
-          output_width));
-
-  PADDLE_ENFORCE_GT(
-      blocks_width,
-      0,
-      phi::errors::InvalidArgument(
-          "The sliding blocks calculated from input spatial size (%d, %d), "
-          "kernel_sizes (%d, %d), strides (%d, %d), dilations (%d, %d), "
-          "is (%d, %d), which should be a positive integer.",
-          in_dims[2],
-          in_dims[3],
-          kernel_sizes[0],
-          kernel_sizes[1],
-          strides[0],
-          strides[1],
-          dilations[0],
-          dilations[1],
-          output_height,
-          output_width));
-
-  PADDLE_ENFORCE_EQ(
-      blocks_height * blocks_width,
-      in_dims[2],
-      phi::errors::InvalidArgument(
-          "Given input output_size (%d, %d), "
-          "kernel_sizes (%d, %d), strides (%d, %d), dilations (%d, %d), "
-          "which should be expected size of input's dimension "
-          "2 to match the calculated number of %d * %d = %d, but got %d",
-          output_height,
-          output_width,
-          kernel_sizes[0],
-          kernel_sizes[1],
-          strides[0],
-          strides[1],
-          dilations[0],
-          dilations[1],
-          blocks_height,
-          blocks_width,
-          blocks_height * blocks_width,
-          in_dims[2]));
-
-  PADDLE_ENFORCE_EQ(
-      in_dims[1] % (kernel_sizes[0] * kernel_sizes[1]),
-      0,
-      phi::errors::InvalidArgument(
-          "Expected size of input's dimension 1 to be divisible by the"
-          "product of kernel_size, but got input.size(1)=%d and "
-          "kernel_size=( %d"
-          ", %d).",
-          in_dims[1],
-          kernel_sizes[0],
-          kernel_sizes[1]));
-
-  out_dims.push_back(output_height);
-  out_dims.push_back(output_width);
-  if (out != nullptr) {
-    out->set_dims(phi::make_ddim(out_dims));
-    out->set_dtype(x.dtype());
-  }
-}
-
 }  // namespace phi
 
 PD_REGISTER_INFER_META_FN(flatten, phi::FlattenInferMeta);
diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h
index 66f72a681e..153b2b8f5f 100644
--- a/paddle/phi/infermeta/unary.h
+++ b/paddle/phi/infermeta/unary.h
@@ -65,6 +65,11 @@ void BatchSizeLikeInferMeta(const MetaTensor& x,
 
 void CastInferMeta(const MetaTensor& x, DataType out_dtype, MetaTensor* out);
 
+void ChannelShuffleInferMeta(const MetaTensor& x,
+                             int groups,
+                             const std::string& data_format,
+                             MetaTensor* out);
+
 void CholeskyInferMeta(const MetaTensor& x, bool upper, MetaTensor* out);
 
 void ClassCenterSampleInferMeta(const MetaTensor& label,
@@ -191,6 +196,14 @@ void FlipInferMeta(const MetaTensor& x,
                    const std::vector<int>& axis,
                    MetaTensor* out);
 
+void FoldInferMeta(const MetaTensor& x,
+                   const std::vector<int>& output_sizes,
+                   const std::vector<int>& kernel_sizes,
+                   const std::vector<int>& strides,
+                   const std::vector<int>& paddings,
+                   const std::vector<int>& dilations,
+                   MetaTensor* out);
+
 void FrameInferMeta(const MetaTensor& x,
                     int frame_length,
                     int hop_length,
@@ -214,6 +227,8 @@ void GumbelSoftmaxInferMeta(const MetaTensor& x,
 void HistogramInferMeta(
     const MetaTensor& input, int64_t bins, int min, int max, MetaTensor* out);
 
+void IdentityLossInferMeta(const MetaTensor& x, int reduction, MetaTensor* out);
+
 void IncrementInferMeta(const MetaTensor& x, float value, MetaTensor* out);
 
 void InferMetaFromVecValue(const MetaTensor& x,
@@ -288,6 +303,8 @@ void NanmedianInferMeta(const MetaTensor& x,
                         MetaTensor* out,
                         MetaTensor* median_index);
 
+void NonZeroInferMeta(const MetaTensor& condition, MetaTensor* out);
+
 void NMSInferMeta(const MetaTensor& x, float threshold, MetaTensor* out);
 
 void NormInferMeta(const MetaTensor& x,
@@ -297,6 +314,14 @@ void NormInferMeta(const MetaTensor& x,
                    MetaTensor* out,
                    MetaTensor* norm);
 
+void OneHotRawInferMeta(const MetaTensor& x,
+                        const Scalar& depth,
+                        DataType dtype,
+                        bool allow_out_of_range,
+                        MetaTensor* out);
+
+void OneHotInferMeta(const MetaTensor& x, const Scalar& depth, MetaTensor* out);
+
 void OverlapAddInferMeta(const MetaTensor& x,
                          int hop_length,
                          int axis,
@@ -576,10 +601,10 @@ void TransposeGradInferMeta(const MetaTensor& x,
                             const std::vector<int>& axis,
                             MetaTensor* out);
 
-void TrilTriuInferMeta(const MetaTensor& x,
-                       int diagonal,
-                       bool lower,
-                       MetaTensor* out);
+void TrilInferMeta(const MetaTensor& x,
+                   int diagonal,
+                   bool lower,
+                   MetaTensor* out);
 
 void UnbindInferMeta(const MetaTensor& x,
                      int axis,
@@ -657,29 +682,4 @@ void UnStackInferMeta(const MetaTensor& x,
                       int num,
                       std::vector<MetaTensor*> outs);
 
-void OneHotRawInferMeta(const MetaTensor& x,
-                        const Scalar& depth,
-                        DataType dtype,
-                        bool allow_out_of_range,
-                        MetaTensor* out);
-
-void OneHotInferMeta(const MetaTensor& x, const Scalar& depth, MetaTensor* out);
-
-void WhereIndexInferMeta(const MetaTensor& condition, MetaTensor* out);
-
-void ChannelShuffleInferMeta(const MetaTensor& x,
-                             int groups,
-                             const std::string& data_format,
-                             MetaTensor* out);
-
-void IdentityLossInferMeta(const MetaTensor& x, int reduction, MetaTensor* out);
-
-void FoldInferMeta(const MetaTensor& x,
-                   const std::vector<int>& output_sizes,
-                   const std::vector<int>& kernel_sizes,
-                   const std::vector<int>& strides,
-                   const std::vector<int>& paddings,
-                   const std::vector<int>& dilations,
-                   MetaTensor* out);
-
 }  // namespace phi
diff --git a/paddle/phi/kernels/cpu/hierarchical_sigmoid_grad_kernel.cc b/paddle/phi/kernels/cpu/hierarchical_sigmoid_grad_kernel.cc
deleted file mode 100644
index eee4525293..0000000000
--- a/paddle/phi/kernels/cpu/hierarchical_sigmoid_grad_kernel.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/kernels/hierarchical_sigmoid_grad_kernel.h"
-
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/cpu/hierarchical_sigmoid_grad.h"
-
-namespace phi {
-
-template <typename T, typename Context>
-void HierarchicalSigmoidGradKernel(const Context& ctx,
-                                   const DenseTensor& x,
-                                   const DenseTensor& w,
-                                   const DenseTensor& label,
-                                   const paddle::optional<DenseTensor>& path,
-                                   const paddle::optional<DenseTensor>& code,
-                                   const paddle::optional<DenseTensor>& bias,
-                                   const DenseTensor& pre_out,
-                                   const DenseTensor& out_grad,
-                                   int num_classes,
-                                   bool remote_prefetch,
-                                   int trainer_id,
-                                   const std::vector<int64_t>& height_sections,
-                                   const std::vector<std::string>& epmap,
-                                   const std::vector<std::string>& table_names,
-                                   bool is_sparse,
-                                   DenseTensor* x_grad,
-                                   DenseTensor* w_grad,
-                                   DenseTensor* bias_grad) {
-  HierarchicalSigmoidGradKernelImpl<T>(ctx,
-                                       x,
-                                       w,
-                                       label,
-                                       path,
-                                       code,
-                                       bias,
-                                       pre_out,
-                                       out_grad,
-                                       num_classes,
-                                       remote_prefetch,
-                                       trainer_id,
-                                       height_sections,
-                                       epmap,
-                                       table_names,
-                                       is_sparse,
-                                       x_grad,
-                                       w_grad,
-                                       bias_grad);
-}
-
-}  // namespace phi
-
-PD_REGISTER_KERNEL(hierarchical_sigmoid_grad,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::HierarchicalSigmoidGradKernel,
-                   float,
-                   double) {}
diff --git a/paddle/phi/kernels/cpu/hierarchical_sigmoid_grad.h b/paddle/phi/kernels/cpu/hsigmoid_loss_grad.h
similarity index 71%
rename from paddle/phi/kernels/cpu/hierarchical_sigmoid_grad.h
rename to paddle/phi/kernels/cpu/hsigmoid_loss_grad.h
index 9b38095f25..7e6693c4dd 100644
--- a/paddle/phi/kernels/cpu/hierarchical_sigmoid_grad.h
+++ b/paddle/phi/kernels/cpu/hsigmoid_loss_grad.h
@@ -26,27 +26,26 @@ namespace phi {
 namespace math = paddle::operators::math;
 
 template <typename T, typename Context>
-void HierarchicalSigmoidGradKernelImpl(
-    const Context& ctx,
-    const DenseTensor& x,
-    const DenseTensor& w,
-    const DenseTensor& label,
-    const paddle::optional<DenseTensor>& path,
-    const paddle::optional<DenseTensor>& code,
-    const paddle::optional<DenseTensor>& bias,
-    const DenseTensor& pre_out,
-    const DenseTensor& out_grad,
-    int num_classes,
-    bool remote_prefetch,
-    int trainer_id,
-    const std::vector<int64_t>& height_sections,
-    const std::vector<std::string>& epmap,
-    const std::vector<std::string>& table_names,
-    bool is_sparse,
-    DenseTensor* x_grad,
-    DenseTensor* w_grad,
-    DenseTensor* bias_grad,
-    SelectedRows* w_grad_sr = nullptr) {
+void HSigmoidLossGradKernelImpl(const Context& ctx,
+                                const DenseTensor& x,
+                                const DenseTensor& w,
+                                const DenseTensor& label,
+                                const paddle::optional<DenseTensor>& path,
+                                const paddle::optional<DenseTensor>& code,
+                                const paddle::optional<DenseTensor>& bias,
+                                const DenseTensor& pre_out,
+                                const DenseTensor& out_grad,
+                                int num_classes,
+                                bool remote_prefetch,
+                                int trainer_id,
+                                const std::vector<int64_t>& height_sections,
+                                const std::vector<std::string>& epmap,
+                                const std::vector<std::string>& table_names,
+                                bool is_sparse,
+                                DenseTensor* x_grad,
+                                DenseTensor* w_grad,
+                                DenseTensor* bias_grad,
+                                SelectedRows* w_grad_sr = nullptr) {
   funcs::SetConstant<Context, T> zero;
   DenseTensor pre_out_grad;
 
diff --git a/paddle/phi/kernels/cpu/hsigmoid_loss_grad_kernel.cc b/paddle/phi/kernels/cpu/hsigmoid_loss_grad_kernel.cc
new file mode 100644
index 0000000000..efb59d1f48
--- /dev/null
+++ b/paddle/phi/kernels/cpu/hsigmoid_loss_grad_kernel.cc
@@ -0,0 +1,71 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/hsigmoid_loss_grad_kernel.h"
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/cpu/hsigmoid_loss_grad.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void HSigmoidLossGradKernel(const Context& ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& w,
+                            const DenseTensor& label,
+                            const paddle::optional<DenseTensor>& path,
+                            const paddle::optional<DenseTensor>& code,
+                            const paddle::optional<DenseTensor>& bias,
+                            const DenseTensor& pre_out,
+                            const DenseTensor& out_grad,
+                            int num_classes,
+                            bool remote_prefetch,
+                            int trainer_id,
+                            const std::vector<int64_t>& height_sections,
+                            const std::vector<std::string>& epmap,
+                            const std::vector<std::string>& table_names,
+                            bool is_sparse,
+                            DenseTensor* x_grad,
+                            DenseTensor* w_grad,
+                            DenseTensor* bias_grad) {
+  HSigmoidLossGradKernelImpl<T>(ctx,
+                                x,
+                                w,
+                                label,
+                                path,
+                                code,
+                                bias,
+                                pre_out,
+                                out_grad,
+                                num_classes,
+                                remote_prefetch,
+                                trainer_id,
+                                height_sections,
+                                epmap,
+                                table_names,
+                                is_sparse,
+                                x_grad,
+                                w_grad,
+                                bias_grad);
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(hsigmoid_loss_grad,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::HSigmoidLossGradKernel,
+                   float,
+                   double) {}
diff --git a/paddle/phi/kernels/cpu/hierarchical_sigmoid_kernel.cc b/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc
similarity index 72%
rename from paddle/phi/kernels/cpu/hierarchical_sigmoid_kernel.cc
rename to paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc
index 7c3421e88d..fa0c83031d 100644
--- a/paddle/phi/kernels/cpu/hierarchical_sigmoid_kernel.cc
+++ b/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/hierarchical_sigmoid_kernel.h"
+#include "paddle/phi/kernels/hsigmoid_loss_kernel.h"
 
 #include "paddle/fluid/operators/math/matrix_bit_code.h"
 #include "paddle/fluid/platform/transform.h"
@@ -28,23 +28,23 @@ namespace phi {
 namespace math = paddle::operators::math;
 
 template <typename T, typename Context>
-void HierarchicalSigmoidKernel(const Context& ctx,
-                               const DenseTensor& x,
-                               const DenseTensor& w,
-                               const DenseTensor& label,
-                               const paddle::optional<DenseTensor>& path,
-                               const paddle::optional<DenseTensor>& code,
-                               const paddle::optional<DenseTensor>& bias,
-                               int num_classes,
-                               bool remote_prefetch,
-                               int trainer_id,
-                               const std::vector<int64_t>& height_sections,
-                               const std::vector<std::string>& epmap,
-                               const std::vector<std::string>& table_names,
-                               bool is_sparse,
-                               DenseTensor* out,
-                               DenseTensor* pre_out,
-                               DenseTensor* w_out) {
+void HSigmoidLossKernel(const Context& ctx,
+                        const DenseTensor& x,
+                        const DenseTensor& w,
+                        const DenseTensor& label,
+                        const paddle::optional<DenseTensor>& path,
+                        const paddle::optional<DenseTensor>& code,
+                        const paddle::optional<DenseTensor>& bias,
+                        int num_classes,
+                        bool remote_prefetch,
+                        int trainer_id,
+                        const std::vector<int64_t>& height_sections,
+                        const std::vector<std::string>& epmap,
+                        const std::vector<std::string>& table_names,
+                        bool is_sparse,
+                        DenseTensor* out,
+                        DenseTensor* pre_out,
+                        DenseTensor* w_out) {
   size_t num_classes_st = static_cast<size_t>(num_classes);
   // for remote prefetch
 
@@ -106,9 +106,5 @@ void HierarchicalSigmoidKernel(const Context& ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(hierarchical_sigmoid,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::HierarchicalSigmoidKernel,
-                   float,
-                   double) {}
+PD_REGISTER_KERNEL(
+    hsigmoid_loss, CPU, ALL_LAYOUT, phi::HSigmoidLossKernel, float, double) {}
diff --git a/paddle/phi/kernels/cpu/where_index_kernel.cc b/paddle/phi/kernels/cpu/nonzero_kernel.cc
similarity index 90%
rename from paddle/phi/kernels/cpu/where_index_kernel.cc
rename to paddle/phi/kernels/cpu/nonzero_kernel.cc
index da6eff7401..fca8e6b09f 100644
--- a/paddle/phi/kernels/cpu/where_index_kernel.cc
+++ b/paddle/phi/kernels/cpu/nonzero_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/where_index_kernel.h"
+#include "paddle/phi/kernels/nonzero_kernel.h"
 
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/kernel_registry.h"
@@ -47,9 +47,9 @@ struct WhereIndexFunctor {
 };
 
 template <typename T, typename Context>
-void WhereIndexKernel(const Context& dev_ctx,
-                      const DenseTensor& condition,
-                      DenseTensor* out) {
+void NonZeroKernel(const Context& dev_ctx,
+                   const DenseTensor& condition,
+                   DenseTensor* out) {
   const T* cond_data = condition.data<T>();
   auto numel = condition.numel();
   auto dims = condition.dims();
@@ -83,10 +83,10 @@ void WhereIndexKernel(const Context& dev_ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(where_index,
+PD_REGISTER_KERNEL(nonzero,
                    CPU,
                    ALL_LAYOUT,
-                   phi::WhereIndexKernel,
+                   phi::NonZeroKernel,
                    int64_t,
                    int,
                    int16_t,
diff --git a/paddle/phi/kernels/cpu/reduce_prod_grad_kernel.cc b/paddle/phi/kernels/cpu/prod_grad_kernel.cc
similarity index 84%
rename from paddle/phi/kernels/cpu/reduce_prod_grad_kernel.cc
rename to paddle/phi/kernels/cpu/prod_grad_kernel.cc
index bec6deb907..f602a8e078 100644
--- a/paddle/phi/kernels/cpu/reduce_prod_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/prod_grad_kernel.cc
@@ -12,16 +12,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/reduce_prod_grad_kernel.h"
+#include "paddle/phi/kernels/prod_grad_kernel.h"
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h"
+#include "paddle/phi/kernels/impl/prod_grad_kernel_impl.h"
 
 PD_REGISTER_KERNEL(prod_grad,
                    CPU,
                    ALL_LAYOUT,
-                   phi::ReduceProdGradKernel,
+                   phi::ProdGradKernel,
                    float,
                    double,
                    int,
diff --git a/paddle/phi/kernels/cpu/reduce_prod_kernel.cc b/paddle/phi/kernels/cpu/prod_kernel.cc
similarity index 96%
rename from paddle/phi/kernels/cpu/reduce_prod_kernel.cc
rename to paddle/phi/kernels/cpu/prod_kernel.cc
index 36766d27ed..af5ea5cb95 100644
--- a/paddle/phi/kernels/cpu/reduce_prod_kernel.cc
+++ b/paddle/phi/kernels/cpu/prod_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/reduce_prod_kernel.h"
+#include "paddle/phi/kernels/prod_kernel.h"
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
diff --git a/paddle/phi/kernels/cpu/tril_triu_kernel.cc b/paddle/phi/kernels/cpu/tril_grad_kernel.cc
similarity index 88%
rename from paddle/phi/kernels/cpu/tril_triu_kernel.cc
rename to paddle/phi/kernels/cpu/tril_grad_kernel.cc
index f3599bb92b..fba457424f 100644
--- a/paddle/phi/kernels/cpu/tril_triu_kernel.cc
+++ b/paddle/phi/kernels/cpu/tril_grad_kernel.cc
@@ -14,12 +14,12 @@
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/tril_triu_kernel_impl.h"
+#include "paddle/phi/kernels/impl/tril_grad_kernel_impl.h"
 
-PD_REGISTER_KERNEL(tril_triu,
+PD_REGISTER_KERNEL(tril_grad,
                    CPU,
                    ALL_LAYOUT,
-                   phi::TrilTriuKernel,
+                   phi::TrilGradKernel,
                    bool,
                    float,
                    double,
diff --git a/paddle/phi/kernels/cpu/tril_triu_grad_kernel.cc b/paddle/phi/kernels/cpu/tril_kernel.cc
similarity index 86%
rename from paddle/phi/kernels/cpu/tril_triu_grad_kernel.cc
rename to paddle/phi/kernels/cpu/tril_kernel.cc
index 660254fef8..82902a1977 100644
--- a/paddle/phi/kernels/cpu/tril_triu_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/tril_kernel.cc
@@ -14,12 +14,12 @@
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/tril_triu_grad_kernel_impl.h"
+#include "paddle/phi/kernels/impl/tril_kernel_impl.h"
 
-PD_REGISTER_KERNEL(tril_triu_grad,
+PD_REGISTER_KERNEL(tril,
                    CPU,
                    ALL_LAYOUT,
-                   phi::TrilTriuGradKernel,
+                   phi::TrilKernel,
                    bool,
                    float,
                    double,
diff --git a/paddle/phi/kernels/cpu/uniform_random_inplace_grad_kernel.cc b/paddle/phi/kernels/cpu/uniform_inplace_grad_kernel.cc
similarity index 59%
rename from paddle/phi/kernels/cpu/uniform_random_inplace_grad_kernel.cc
rename to paddle/phi/kernels/cpu/uniform_inplace_grad_kernel.cc
index d448312949..6358ff9962 100644
--- a/paddle/phi/kernels/cpu/uniform_random_inplace_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/uniform_inplace_grad_kernel.cc
@@ -12,22 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/phi/kernels/uniform_random_inplace_grad_kernel.h"
+#include "paddle/phi/kernels/uniform_inplace_grad_kernel.h"
 
 #include "paddle/phi/core/kernel_registry.h"
 
 namespace phi {
 
 template <typename T, typename Context>
-void UniformRandomInplaceGradKernel(const Context& ctx,
-                                    const DenseTensor& out_grad,
-                                    float min,
-                                    float max,
-                                    int seed,
-                                    int diag_num,
-                                    int diag_step,
-                                    float diag_val,
-                                    DenseTensor* x_grad) {
+void UniformInplaceGradKernel(const Context& ctx,
+                              const DenseTensor& out_grad,
+                              float min,
+                              float max,
+                              int seed,
+                              int diag_num,
+                              int diag_step,
+                              float diag_val,
+                              DenseTensor* x_grad) {
   if (x_grad) {
     auto* data = ctx.template Alloc<T>(x_grad);
     std::fill(data, data + x_grad->numel(), T(0));
@@ -36,9 +36,9 @@ void UniformRandomInplaceGradKernel(const Context& ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(uniform_random_inplace_grad,
+PD_REGISTER_KERNEL(uniform_inplace_grad,
                    CPU,
                    ALL_LAYOUT,
-                   phi::UniformRandomInplaceGradKernel,
+                   phi::UniformInplaceGradKernel,
                    float,
                    double) {}
diff --git a/paddle/phi/kernels/cpu/uniform_random_inplace_kernel.cc b/paddle/phi/kernels/cpu/uniform_inplace_kernel.cc
similarity index 68%
rename from paddle/phi/kernels/cpu/uniform_random_inplace_kernel.cc
rename to paddle/phi/kernels/cpu/uniform_inplace_kernel.cc
index 6e687fbf54..b6801e4caf 100644
--- a/paddle/phi/kernels/cpu/uniform_random_inplace_kernel.cc
+++ b/paddle/phi/kernels/cpu/uniform_inplace_kernel.cc
@@ -12,22 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/phi/kernels/uniform_random_inplace_kernel.h"
+#include "paddle/phi/kernels/uniform_inplace_kernel.h"
 
 #include "paddle/phi/core/kernel_registry.h"
 
 namespace phi {
 
 template <typename T, typename Context>
-void UniformRandomInplaceKernel(const Context& ctx,
-                                const DenseTensor& x,
-                                float min,
-                                float max,
-                                int seed,
-                                int diag_num,
-                                int diag_step,
-                                float diag_val,
-                                DenseTensor* out) {
+void UniformInplaceKernel(const Context& ctx,
+                          const DenseTensor& x,
+                          float min,
+                          float max,
+                          int seed,
+                          int diag_num,
+                          int diag_step,
+                          float diag_val,
+                          DenseTensor* out) {
   T* data = ctx.template Alloc<T>(out);
   int64_t size = out->numel();
   std::uniform_real_distribution<T> dist(static_cast<T>(min),
@@ -46,9 +46,9 @@ void UniformRandomInplaceKernel(const Context& ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(uniform_random_inplace,
+PD_REGISTER_KERNEL(uniform_inplace,
                    CPU,
                    ALL_LAYOUT,
-                   phi::UniformRandomInplaceKernel,
+                   phi::UniformInplaceKernel,
                    float,
                    double) {}
diff --git a/paddle/phi/kernels/cpu/uniform_random_kernel.cc b/paddle/phi/kernels/cpu/uniform_kernel.cc
similarity index 76%
rename from paddle/phi/kernels/cpu/uniform_random_kernel.cc
rename to paddle/phi/kernels/cpu/uniform_kernel.cc
index a4e66a8f64..1b1503473d 100644
--- a/paddle/phi/kernels/cpu/uniform_random_kernel.cc
+++ b/paddle/phi/kernels/cpu/uniform_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/uniform_random_kernel.h"
+#include "paddle/phi/kernels/uniform_kernel.h"
 
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/uniform_real_distribution.h"
@@ -20,16 +20,16 @@
 namespace phi {
 
 template <typename T, typename Context>
-void UniformRandomRawKernel(const Context &dev_ctx,
-                            const IntArray &shape,
-                            DataType dtype,
-                            const Scalar &min,
-                            const Scalar &max,
-                            int seed,
-                            int diag_num,
-                            int diag_step,
-                            float diag_val,
-                            DenseTensor *out) {
+void UniformRawKernel(const Context &dev_ctx,
+                      const IntArray &shape,
+                      DataType dtype,
+                      const Scalar &min,
+                      const Scalar &max,
+                      int seed,
+                      int diag_num,
+                      int diag_step,
+                      float diag_val,
+                      DenseTensor *out) {
   out->Resize(phi::make_ddim(shape.GetData()));
   T *data = dev_ctx.template Alloc<T>(out);
   auto size = out->numel();
@@ -63,10 +63,10 @@ void UniformRandomRawKernel(const Context &dev_ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(uniform_random_raw,
+PD_REGISTER_KERNEL(uniform_raw,
                    CPU,
                    ALL_LAYOUT,
-                   phi::UniformRandomRawKernel,
+                   phi::UniformRawKernel,
                    float,
                    double,
                    phi::dtype::bfloat16) {}
diff --git a/paddle/phi/kernels/cpu/yolov3_loss_functor.h b/paddle/phi/kernels/cpu/yolo_loss_functor.h
similarity index 100%
rename from paddle/phi/kernels/cpu/yolov3_loss_functor.h
rename to paddle/phi/kernels/cpu/yolo_loss_functor.h
diff --git a/paddle/phi/kernels/cpu/yolov3_loss_grad_kernel.cc b/paddle/phi/kernels/cpu/yolo_loss_grad_kernel.cc
similarity index 85%
rename from paddle/phi/kernels/cpu/yolov3_loss_grad_kernel.cc
rename to paddle/phi/kernels/cpu/yolo_loss_grad_kernel.cc
index bc3beae8b0..647a093472 100644
--- a/paddle/phi/kernels/cpu/yolov3_loss_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/yolo_loss_grad_kernel.cc
@@ -12,14 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/yolov3_loss_grad_kernel.h"
+#include "paddle/phi/kernels/yolo_loss_grad_kernel.h"
 
 #include <algorithm>
 #include <vector>
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/cpu/yolov3_loss_functor.h"
+#include "paddle/phi/kernels/cpu/yolo_loss_functor.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace phi {
@@ -117,25 +117,25 @@ static inline void CalcObjnessLossGrad(T* input_grad,
 }
 
 template <typename T, typename Context>
-void Yolov3LossGradKernel(const Context& dev_ctx,
-                          const DenseTensor& x,
-                          const DenseTensor& gt_box,
-                          const DenseTensor& gt_label,
-                          const paddle::optional<DenseTensor>& gt_score,
-                          const DenseTensor& objectness_mask,
-                          const DenseTensor& gt_match_mask,
-                          const DenseTensor& loss_grad,
-                          const std::vector<int>& anchors,
-                          const std::vector<int>& anchor_mask,
-                          int class_num,
-                          float ignore_thresh,
-                          int downsample_ratio,
-                          bool use_label_smooth,
-                          float scale_x_y,
-                          DenseTensor* x_grad,
-                          DenseTensor* gt_box_grad,
-                          DenseTensor* gt_label_grad,
-                          DenseTensor* gt_score_grad) {
+void YoloLossGradKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        const DenseTensor& gt_box,
+                        const DenseTensor& gt_label,
+                        const paddle::optional<DenseTensor>& gt_score,
+                        const DenseTensor& objectness_mask,
+                        const DenseTensor& gt_match_mask,
+                        const DenseTensor& loss_grad,
+                        const std::vector<int>& anchors,
+                        const std::vector<int>& anchor_mask,
+                        int class_num,
+                        float ignore_thresh,
+                        int downsample_ratio,
+                        bool use_label_smooth,
+                        float scale_x_y,
+                        DenseTensor* x_grad,
+                        DenseTensor* gt_box_grad,
+                        DenseTensor* gt_label_grad,
+                        DenseTensor* gt_score_grad) {
   auto* input = &x;
   auto input_grad = x_grad;
   auto* objness_mask = &objectness_mask;
@@ -237,9 +237,5 @@ void Yolov3LossGradKernel(const Context& dev_ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(yolov3_loss_grad,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::Yolov3LossGradKernel,
-                   float,
-                   double) {}
+PD_REGISTER_KERNEL(
+    yolo_loss_grad, CPU, ALL_LAYOUT, phi::YoloLossGradKernel, float, double) {}
diff --git a/paddle/phi/kernels/cpu/yolov3_loss_kernel.cc b/paddle/phi/kernels/cpu/yolo_loss_kernel.cc
similarity index 93%
rename from paddle/phi/kernels/cpu/yolov3_loss_kernel.cc
rename to paddle/phi/kernels/cpu/yolo_loss_kernel.cc
index 75b2e3c5c4..b32d7ee596 100644
--- a/paddle/phi/kernels/cpu/yolov3_loss_kernel.cc
+++ b/paddle/phi/kernels/cpu/yolo_loss_kernel.cc
@@ -12,14 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/yolov3_loss_kernel.h"
+#include "paddle/phi/kernels/yolo_loss_kernel.h"
 
 #include <algorithm>
 #include <vector>
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/cpu/yolov3_loss_functor.h"
+#include "paddle/phi/kernels/cpu/yolo_loss_functor.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace phi {
@@ -178,21 +178,21 @@ static void inline GtValid(bool* valid,
 }
 
 template <typename T, typename Context>
-void Yolov3LossKernel(const Context& dev_ctx,
-                      const DenseTensor& x,
-                      const DenseTensor& gt_box,
-                      const DenseTensor& gt_label,
-                      const paddle::optional<DenseTensor>& gt_score,
-                      const std::vector<int>& anchors,
-                      const std::vector<int>& anchor_mask,
-                      int class_num,
-                      float ignore_thresh,
-                      int downsample_ratio,
-                      bool use_label_smooth,
-                      float scale_x_y,
-                      DenseTensor* loss,
-                      DenseTensor* objectness_mask,
-                      DenseTensor* gt_match_mask) {
+void YoloLossKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& gt_box,
+                    const DenseTensor& gt_label,
+                    const paddle::optional<DenseTensor>& gt_score,
+                    const std::vector<int>& anchors,
+                    const std::vector<int>& anchor_mask,
+                    int class_num,
+                    float ignore_thresh,
+                    int downsample_ratio,
+                    bool use_label_smooth,
+                    float scale_x_y,
+                    DenseTensor* loss,
+                    DenseTensor* objectness_mask,
+                    DenseTensor* gt_match_mask) {
   auto* input = &x;
   auto objness_mask = objectness_mask;
   float scale = scale_x_y;
@@ -371,4 +371,4 @@ void Yolov3LossKernel(const Context& dev_ctx,
 }  // namespace phi
 
 PD_REGISTER_KERNEL(
-    yolov3_loss, CPU, ALL_LAYOUT, phi::Yolov3LossKernel, float, double) {}
+    yolo_loss, CPU, ALL_LAYOUT, phi::YoloLossKernel, float, double) {}
diff --git a/paddle/phi/kernels/gpu/lstsq_kernel.cu b/paddle/phi/kernels/gpu/lstsq_kernel.cu
index adb0ca09d8..0e59dbe9df 100644
--- a/paddle/phi/kernels/gpu/lstsq_kernel.cu
+++ b/paddle/phi/kernels/gpu/lstsq_kernel.cu
@@ -23,7 +23,7 @@
 #include "paddle/phi/kernels/funcs/slice.h"
 #include "paddle/phi/kernels/impl/lstsq_kernel_impl.h"
 #include "paddle/phi/kernels/impl/qr_kernel_impl.h"
-#include "paddle/phi/kernels/impl/tril_triu_kernel_impl.h"
+#include "paddle/phi/kernels/impl/tril_kernel_impl.h"
 #include "paddle/phi/kernels/lstsq_kernel.h"
 #include "paddle/phi/kernels/matmul_kernel.h"
 #include "paddle/phi/kernels/transpose_kernel.h"
@@ -110,7 +110,7 @@ void LstsqKernel(const Context& dev_ctx,
     DenseTensor* res_r = new DenseTensor();
     res_r->Resize(phi::make_ddim({batch_count, min_mn, min_mn}));
     dev_ctx.template Alloc<T>(res_r);
-    phi::TrilTriuKernel<T>(dev_ctx, slice_r, 0, false, res_r);
+    phi::TrilKernel<T>(dev_ctx, slice_r, 0, false, res_r);
 
     DenseTensor trans_y = phi::TransposeLast2Dim<T>(dev_ctx, tmp_y);
     DenseTensor slice_y =
@@ -135,7 +135,7 @@ void LstsqKernel(const Context& dev_ctx,
     DenseTensor* res_r = new DenseTensor();
     res_r->Resize(phi::make_ddim({batch_count, min_mn, min_mn}));
     dev_ctx.template Alloc<T>(res_r);
-    phi::TrilTriuKernel<T>(dev_ctx, slice_r, 0, false, res_r);
+    phi::TrilKernel<T>(dev_ctx, slice_r, 0, false, res_r);
 
     phi::TriangularSolveKernel<T, Context>(
         dev_ctx, *res_r, *new_y, true, true, false, solution);
diff --git a/paddle/phi/kernels/gpu/where_index_kernel.cu b/paddle/phi/kernels/gpu/nonzero_kernel.cu
similarity index 90%
rename from paddle/phi/kernels/gpu/where_index_kernel.cu
rename to paddle/phi/kernels/gpu/nonzero_kernel.cu
index c16859c52b..11139c7d65 100644
--- a/paddle/phi/kernels/gpu/where_index_kernel.cu
+++ b/paddle/phi/kernels/gpu/nonzero_kernel.cu
@@ -25,7 +25,7 @@ namespace cub = hipcub;
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/phi/kernels/funcs/select_impl.cu.h"
-#include "paddle/phi/kernels/where_index_kernel.h"
+#include "paddle/phi/kernels/nonzero_kernel.h"
 
 namespace phi {
 template <typename MaskT, typename IndexT, typename OutT>
@@ -62,9 +62,9 @@ struct IndexFunctor {
 };
 
 template <typename T, typename Context>
-void WhereIndexKernel(const Context &dev_ctx,
-                      const DenseTensor &condition,
-                      DenseTensor *out) {
+void NonZeroKernel(const Context &dev_ctx,
+                   const DenseTensor &condition,
+                   DenseTensor *out) {
   DenseTensor in_data;
   auto dims = condition.dims();
   using Functor = IndexFunctor<T, int64_t, int64_t>;
@@ -74,10 +74,10 @@ void WhereIndexKernel(const Context &dev_ctx,
 }
 }  // namespace phi
 
-PD_REGISTER_KERNEL(where_index,
+PD_REGISTER_KERNEL(nonzero,
                    GPU,
                    ALL_LAYOUT,
-                   phi::WhereIndexKernel,
+                   phi::NonZeroKernel,
                    int64_t,
                    int,
                    int16_t,
diff --git a/paddle/phi/kernels/gpu/reduce_prod_grad_kernel.cu b/paddle/phi/kernels/gpu/prod_grad_kernel.cu
similarity index 84%
rename from paddle/phi/kernels/gpu/reduce_prod_grad_kernel.cu
rename to paddle/phi/kernels/gpu/prod_grad_kernel.cu
index 08444cf95d..301cc46b0b 100644
--- a/paddle/phi/kernels/gpu/reduce_prod_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/prod_grad_kernel.cu
@@ -12,16 +12,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/reduce_prod_grad_kernel.h"
+#include "paddle/phi/kernels/prod_grad_kernel.h"
 
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h"
+#include "paddle/phi/kernels/impl/prod_grad_kernel_impl.h"
 
 PD_REGISTER_KERNEL(prod_grad,
                    GPU,
                    ALL_LAYOUT,
-                   phi::ReduceProdGradKernel,
+                   phi::ProdGradKernel,
                    float,
                    double,
                    int,
diff --git a/paddle/phi/kernels/gpu/qr_kernel.cu b/paddle/phi/kernels/gpu/qr_kernel.cu
index 99752ac486..697cf952c1 100644
--- a/paddle/phi/kernels/gpu/qr_kernel.cu
+++ b/paddle/phi/kernels/gpu/qr_kernel.cu
@@ -31,7 +31,7 @@
 #include "paddle/phi/kernels/qr_kernel.h"
 #include "paddle/phi/kernels/slice_kernel.h"
 #include "paddle/phi/kernels/transpose_kernel.h"
-#include "paddle/phi/kernels/tril_triu_kernel.h"
+#include "paddle/phi/kernels/tril_kernel.h"
 
 namespace phi {
 
@@ -103,12 +103,12 @@ void QrKernel(const Context& ctx,
     auto trans_qr = TransposeLast2Dim<T, Context>(ctx, qr);
     auto sliced_qr = SliceKernel<T, Context>(
         ctx, trans_qr, {trans_qr.dims().size() - 2}, {0}, {min_mn}, {1}, {});
-    auto tmp_r = TrilTriu<T, Context>(ctx, sliced_qr, 0, false);
+    auto tmp_r = Tril<T, Context>(ctx, sliced_qr, 0, false);
     // Transpose 'tmp_r' to retore the original row-major order
     phi::Copy(ctx, tmp_r, r->place(), false, r);
   } else {
     auto trans_qr = TransposeLast2Dim<T, Context>(ctx, qr);
-    auto tmp_r = TrilTriu<T, Context>(ctx, trans_qr, 0, false);
+    auto tmp_r = Tril<T, Context>(ctx, trans_qr, 0, false);
     // Transpose 'tmp_r' to retore the original row-major order
     phi::Copy(ctx, tmp_r, r->place(), false, r);
   }
diff --git a/paddle/phi/kernels/gpu/tril_triu_kernel.cu b/paddle/phi/kernels/gpu/tril_grad_kernel.cu
similarity index 88%
rename from paddle/phi/kernels/gpu/tril_triu_kernel.cu
rename to paddle/phi/kernels/gpu/tril_grad_kernel.cu
index 65dcca7058..5bda0e54b3 100644
--- a/paddle/phi/kernels/gpu/tril_triu_kernel.cu
+++ b/paddle/phi/kernels/gpu/tril_grad_kernel.cu
@@ -14,12 +14,12 @@
 
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/tril_triu_kernel_impl.h"
+#include "paddle/phi/kernels/impl/tril_grad_kernel_impl.h"
 
-PD_REGISTER_KERNEL(tril_triu,
+PD_REGISTER_KERNEL(tril_grad,
                    GPU,
                    ALL_LAYOUT,
-                   phi::TrilTriuKernel,
+                   phi::TrilGradKernel,
                    bool,
                    float,
                    double,
diff --git a/paddle/phi/kernels/gpu/tril_triu_grad_kernel.cu b/paddle/phi/kernels/gpu/tril_kernel.cu
similarity index 86%
rename from paddle/phi/kernels/gpu/tril_triu_grad_kernel.cu
rename to paddle/phi/kernels/gpu/tril_kernel.cu
index 3271b38ae8..c50b7c513f 100644
--- a/paddle/phi/kernels/gpu/tril_triu_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/tril_kernel.cu
@@ -14,12 +14,12 @@
 
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/tril_triu_grad_kernel_impl.h"
+#include "paddle/phi/kernels/impl/tril_kernel_impl.h"
 
-PD_REGISTER_KERNEL(tril_triu_grad,
+PD_REGISTER_KERNEL(tril,
                    GPU,
                    ALL_LAYOUT,
-                   phi::TrilTriuGradKernel,
+                   phi::TrilKernel,
                    bool,
                    float,
                    double,
diff --git a/paddle/phi/kernels/gpu/uniform_random_inplace_grad_kernel.cu b/paddle/phi/kernels/gpu/uniform_inplace_grad_kernel.cu
similarity index 61%
rename from paddle/phi/kernels/gpu/uniform_random_inplace_grad_kernel.cu
rename to paddle/phi/kernels/gpu/uniform_inplace_grad_kernel.cu
index 6c6f525a8d..61efe88076 100644
--- a/paddle/phi/kernels/gpu/uniform_random_inplace_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/uniform_inplace_grad_kernel.cu
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/phi/kernels/uniform_random_inplace_grad_kernel.h"
+#include "paddle/phi/kernels/uniform_inplace_grad_kernel.h"
 
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/full_kernel.h"
@@ -20,15 +20,15 @@ limitations under the License. */
 namespace phi {
 
 template <typename T, typename Context>
-void UniformRandomInplaceGradKernel(const Context& ctx,
-                                    const DenseTensor& out_grad,
-                                    float min,
-                                    float max,
-                                    int seed,
-                                    int diag_num,
-                                    int diag_step,
-                                    float diag_val,
-                                    DenseTensor* x_grad) {
+void UniformInplaceGradKernel(const Context& ctx,
+                              const DenseTensor& out_grad,
+                              float min,
+                              float max,
+                              int seed,
+                              int diag_num,
+                              int diag_step,
+                              float diag_val,
+                              DenseTensor* x_grad) {
   auto dims = vectorize(x_grad->dims());
   float value = static_cast<float>(0.0f);
   phi::FullKernel<T>(ctx, dims, value, phi::DataType::UNDEFINED, x_grad);
@@ -36,9 +36,9 @@ void UniformRandomInplaceGradKernel(const Context& ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(uniform_random_inplace_grad,
+PD_REGISTER_KERNEL(uniform_inplace_grad,
                    GPU,
                    ALL_LAYOUT,
-                   phi::UniformRandomInplaceGradKernel,
+                   phi::UniformInplaceGradKernel,
                    float,
                    double) {}
diff --git a/paddle/phi/kernels/gpu/uniform_random_inplace_kernel.cu b/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu
similarity index 79%
rename from paddle/phi/kernels/gpu/uniform_random_inplace_kernel.cu
rename to paddle/phi/kernels/gpu/uniform_inplace_kernel.cu
index d96f582b19..29bc2f4de5 100644
--- a/paddle/phi/kernels/gpu/uniform_random_inplace_kernel.cu
+++ b/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/phi/kernels/uniform_random_inplace_kernel.h"
+#include "paddle/phi/kernels/uniform_inplace_kernel.h"
 
 #include <thrust/random.h>
 
@@ -54,15 +54,15 @@ struct UniformGenerator {
 };
 
 template <typename T, typename Context>
-void UniformRandomInplaceKernel(const Context& ctx,
-                                const DenseTensor& x,
-                                float min,
-                                float max,
-                                int seed,
-                                int diag_num,
-                                int diag_step,
-                                float diag_val,
-                                DenseTensor* out) {
+void UniformInplaceKernel(const Context& ctx,
+                          const DenseTensor& x,
+                          float min,
+                          float max,
+                          int seed,
+                          int diag_num,
+                          int diag_step,
+                          float diag_val,
+                          DenseTensor* out) {
   ctx.template Alloc<T>(out);
   if (seed == 0) {
     // Use global Generator seed
@@ -80,9 +80,9 @@ void UniformRandomInplaceKernel(const Context& ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(uniform_random_inplace,
+PD_REGISTER_KERNEL(uniform_inplace,
                    GPU,
                    ALL_LAYOUT,
-                   phi::UniformRandomInplaceKernel,
+                   phi::UniformInplaceKernel,
                    float,
                    double) {}
diff --git a/paddle/phi/kernels/gpu/uniform_random_kernel.cu b/paddle/phi/kernels/gpu/uniform_kernel.cu
similarity index 81%
rename from paddle/phi/kernels/gpu/uniform_random_kernel.cu
rename to paddle/phi/kernels/gpu/uniform_kernel.cu
index 458239814b..277dadabea 100644
--- a/paddle/phi/kernels/gpu/uniform_random_kernel.cu
+++ b/paddle/phi/kernels/gpu/uniform_kernel.cu
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/uniform_random_kernel.h"
+#include "paddle/phi/kernels/uniform_kernel.h"
 
 #include <thrust/random.h>
 
@@ -54,16 +54,16 @@ struct UniformGenerator {
 };
 
 template <typename T, typename Context>
-void UniformRandomRawKernel(const Context& dev_ctx,
-                            const IntArray& shape,
-                            DataType dtype,
-                            const Scalar& min,
-                            const Scalar& max,
-                            int seed,
-                            int diag_num,
-                            int diag_step,
-                            float diag_val,
-                            DenseTensor* out) {
+void UniformRawKernel(const Context& dev_ctx,
+                      const IntArray& shape,
+                      DataType dtype,
+                      const Scalar& min,
+                      const Scalar& max,
+                      int seed,
+                      int diag_num,
+                      int diag_step,
+                      float diag_val,
+                      DenseTensor* out) {
   out->Resize(phi::make_ddim(shape.GetData()));
   dev_ctx.template Alloc<T>(out);
   if (seed == 0) {
@@ -86,10 +86,10 @@ void UniformRandomRawKernel(const Context& dev_ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(uniform_random_raw,
+PD_REGISTER_KERNEL(uniform_raw,
                    GPU,
                    ALL_LAYOUT,
-                   phi::UniformRandomRawKernel,
+                   phi::UniformRawKernel,
                    float,
                    double,
                    phi::dtype::float16) {}
diff --git a/paddle/phi/kernels/hierarchical_sigmoid_grad_kernel.h b/paddle/phi/kernels/hierarchical_sigmoid_grad_kernel.h
deleted file mode 100644
index c0da8faadd..0000000000
--- a/paddle/phi/kernels/hierarchical_sigmoid_grad_kernel.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "paddle/phi/core/dense_tensor.h"
-
-namespace phi {
-
-template <typename T, typename Context>
-void HierarchicalSigmoidGradKernel(const Context& ctx,
-                                   const DenseTensor& x,
-                                   const DenseTensor& w,
-                                   const DenseTensor& label,
-                                   const paddle::optional<DenseTensor>& path,
-                                   const paddle::optional<DenseTensor>& code,
-                                   const paddle::optional<DenseTensor>& bias,
-                                   const DenseTensor& pre_out,
-                                   const DenseTensor& out_grad,
-                                   int num_classes,
-                                   bool remote_prefetch,
-                                   int trainer_id,
-                                   const std::vector<int64_t>& height_sections,
-                                   const std::vector<std::string>& epmap,
-                                   const std::vector<std::string>& table_names,
-                                   bool is_sparse,
-                                   DenseTensor* x_grad,
-                                   DenseTensor* w_grad,
-                                   DenseTensor* bias_grad);
-
-}  // namespace phi
diff --git a/paddle/phi/kernels/hierarchical_sigmoid_kernel.h b/paddle/phi/kernels/hierarchical_sigmoid_kernel.h
deleted file mode 100644
index e32306b645..0000000000
--- a/paddle/phi/kernels/hierarchical_sigmoid_kernel.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "paddle/phi/core/dense_tensor.h"
-
-namespace phi {
-
-template <typename T, typename Context>
-void HierarchicalSigmoidKernel(const Context& ctx,
-                               const DenseTensor& x,
-                               const DenseTensor& w,
-                               const DenseTensor& label,
-                               const paddle::optional<DenseTensor>& path,
-                               const paddle::optional<DenseTensor>& code,
-                               const paddle::optional<DenseTensor>& bias,
-                               int num_classes,
-                               bool remote_prefetch,
-                               int trainer_id,
-                               const std::vector<int64_t>& height_sections,
-                               const std::vector<std::string>& epmap,
-                               const std::vector<std::string>& table_names,
-                               bool is_sparse,
-                               DenseTensor* out,
-                               DenseTensor* pre_out,
-                               DenseTensor* w_out);
-
-}  // namespace phi
diff --git a/paddle/phi/kernels/hsigmoid_loss_grad_kernel.h b/paddle/phi/kernels/hsigmoid_loss_grad_kernel.h
new file mode 100644
index 0000000000..e31d429107
--- /dev/null
+++ b/paddle/phi/kernels/hsigmoid_loss_grad_kernel.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void HSigmoidLossGradKernel(const Context& ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& w,
+                            const DenseTensor& label,
+                            const paddle::optional<DenseTensor>& path,
+                            const paddle::optional<DenseTensor>& code,
+                            const paddle::optional<DenseTensor>& bias,
+                            const DenseTensor& pre_out,
+                            const DenseTensor& out_grad,
+                            int num_classes,
+                            bool remote_prefetch,
+                            int trainer_id,
+                            const std::vector<int64_t>& height_sections,
+                            const std::vector<std::string>& epmap,
+                            const std::vector<std::string>& table_names,
+                            bool is_sparse,
+                            DenseTensor* x_grad,
+                            DenseTensor* w_grad,
+                            DenseTensor* bias_grad);
+
+}  // namespace phi
diff --git a/paddle/phi/kernels/hsigmoid_loss_kernel.h b/paddle/phi/kernels/hsigmoid_loss_kernel.h
new file mode 100644
index 0000000000..c8fb3ca77f
--- /dev/null
+++ b/paddle/phi/kernels/hsigmoid_loss_kernel.h
@@ -0,0 +1,40 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void HSigmoidLossKernel(const Context& ctx,
+                        const DenseTensor& x,
+                        const DenseTensor& w,
+                        const DenseTensor& label,
+                        const paddle::optional<DenseTensor>& path,
+                        const paddle::optional<DenseTensor>& code,
+                        const paddle::optional<DenseTensor>& bias,
+                        int num_classes,
+                        bool remote_prefetch,
+                        int trainer_id,
+                        const std::vector<int64_t>& height_sections,
+                        const std::vector<std::string>& epmap,
+                        const std::vector<std::string>& table_names,
+                        bool is_sparse,
+                        DenseTensor* out,
+                        DenseTensor* pre_out,
+                        DenseTensor* w_out);
+
+}  // namespace phi
diff --git a/paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h b/paddle/phi/kernels/impl/prod_grad_kernel_impl.h
similarity index 69%
rename from paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h
rename to paddle/phi/kernels/impl/prod_grad_kernel_impl.h
index a6f92543cc..13f517c072 100644
--- a/paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/prod_grad_kernel_impl.h
@@ -17,19 +17,19 @@
 #include "paddle/phi/common/int_array.h"
 #include "paddle/phi/kernels/funcs/reduce_functor.h"
 #include "paddle/phi/kernels/impl/reduce_grad.h"
-#include "paddle/phi/kernels/reduce_prod_grad_kernel.h"
+#include "paddle/phi/kernels/prod_grad_kernel.h"
 
 namespace phi {
 
 template <typename T, typename Context>
-void ReduceProdGradKernel(const Context& dev_ctx,
-                          const DenseTensor& x,
-                          const DenseTensor& out,
-                          const DenseTensor& out_grad,
-                          const IntArray& dims,
-                          bool keep_dim,
-                          bool reduce_all,
-                          DenseTensor* x_grad) {
+void ProdGradKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& out,
+                    const DenseTensor& out_grad,
+                    const IntArray& dims,
+                    bool keep_dim,
+                    bool reduce_all,
+                    DenseTensor* x_grad) {
   ReduceGradKernel<Context, T, funcs::ProdGradFunctor>(
       dev_ctx, x, out, out_grad, dims.GetData(), keep_dim, reduce_all, x_grad);
 }
diff --git a/paddle/phi/kernels/impl/qr_grad_kernel_impl.h b/paddle/phi/kernels/impl/qr_grad_kernel_impl.h
index 5c04d9bb90..5ad59f757a 100644
--- a/paddle/phi/kernels/impl/qr_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/qr_grad_kernel_impl.h
@@ -29,7 +29,7 @@
 #include "paddle/phi/kernels/slice_kernel.h"
 #include "paddle/phi/kernels/transpose_kernel.h"
 #include "paddle/phi/kernels/triangular_solve_kernel.h"
-#include "paddle/phi/kernels/tril_triu_kernel.h"
+#include "paddle/phi/kernels/tril_kernel.h"
 
 namespace phi {
 
@@ -116,8 +116,8 @@ void QrGradKernel(const Context& ctx,
     DenseTensor M_tmp1 = Subtract<T, Context>(ctx, R_term, Q_term);
 
     // Compute M = (tril(M) + tril(M).mH()) * 0.5 Identity
-    DenseTensor M_tril_0 = TrilTriu<T, Context>(ctx, M_tmp1, 0, true);
-    DenseTensor M_tril_1 = TrilTriu<T, Context>(ctx, M_tmp1, -1, true);
+    DenseTensor M_tril_0 = Tril<T, Context>(ctx, M_tmp1, 0, true);
+    DenseTensor M_tril_1 = Tril<T, Context>(ctx, M_tmp1, -1, true);
     DenseTensor M = Add<T, Context>(
         ctx, M_tril_0, TransposeLast2Dim<T, Context>(ctx, M_tril_1));
 
diff --git a/paddle/phi/kernels/impl/tril_triu_grad_kernel_impl.h b/paddle/phi/kernels/impl/tril_grad_kernel_impl.h
similarity index 82%
rename from paddle/phi/kernels/impl/tril_triu_grad_kernel_impl.h
rename to paddle/phi/kernels/impl/tril_grad_kernel_impl.h
index 91dbde04ac..3f72d34a95 100644
--- a/paddle/phi/kernels/impl/tril_triu_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/tril_grad_kernel_impl.h
@@ -16,16 +16,16 @@
 
 #include "paddle/phi/kernels/funcs/for_range.h"
 #include "paddle/phi/kernels/funcs/tril_triu_compute.h"
-#include "paddle/phi/kernels/tril_triu_grad_kernel.h"
+#include "paddle/phi/kernels/tril_grad_kernel.h"
 
 namespace phi {
 
 template <typename T, typename Context>
-void TrilTriuGradKernel(const Context& ctx,
-                        const DenseTensor& out_grad,
-                        int diagonal,
-                        bool lower,
-                        DenseTensor* x_grad) {
+void TrilGradKernel(const Context& ctx,
+                    const DenseTensor& out_grad,
+                    int diagonal,
+                    bool lower,
+                    DenseTensor* x_grad) {
   const auto* dout_data = out_grad.data<T>();
   auto* dx_data = ctx.template Alloc<T>(x_grad);
 
diff --git a/paddle/phi/kernels/impl/tril_triu_kernel_impl.h b/paddle/phi/kernels/impl/tril_kernel_impl.h
similarity index 83%
rename from paddle/phi/kernels/impl/tril_triu_kernel_impl.h
rename to paddle/phi/kernels/impl/tril_kernel_impl.h
index 24c032893c..8e93e87fbc 100644
--- a/paddle/phi/kernels/impl/tril_triu_kernel_impl.h
+++ b/paddle/phi/kernels/impl/tril_kernel_impl.h
@@ -16,16 +16,16 @@
 
 #include "paddle/phi/kernels/funcs/for_range.h"
 #include "paddle/phi/kernels/funcs/tril_triu_compute.h"
-#include "paddle/phi/kernels/tril_triu_kernel.h"
+#include "paddle/phi/kernels/tril_kernel.h"
 
 namespace phi {
 
 template <typename T, typename Context>
-void TrilTriuKernel(const Context& ctx,
-                    const DenseTensor& x,
-                    int diagonal,
-                    bool lower,
-                    DenseTensor* out) {
+void TrilKernel(const Context& ctx,
+                const DenseTensor& x,
+                int diagonal,
+                bool lower,
+                DenseTensor* out) {
   const auto* x_data = x.data<T>();
   auto* out_data = ctx.template Alloc<T>(out);
 
diff --git a/paddle/phi/kernels/kps/reduce_prod_kernel.cu b/paddle/phi/kernels/kps/prod_kernel.cu
similarity index 96%
rename from paddle/phi/kernels/kps/reduce_prod_kernel.cu
rename to paddle/phi/kernels/kps/prod_kernel.cu
index f5b52937e3..326a351f6d 100644
--- a/paddle/phi/kernels/kps/reduce_prod_kernel.cu
+++ b/paddle/phi/kernels/kps/prod_kernel.cu
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/reduce_prod_kernel.h"
+#include "paddle/phi/kernels/prod_kernel.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/gpu/reduce.h"
 
diff --git a/paddle/phi/kernels/where_index_kernel.h b/paddle/phi/kernels/nonzero_kernel.h
similarity index 84%
rename from paddle/phi/kernels/where_index_kernel.h
rename to paddle/phi/kernels/nonzero_kernel.h
index 68b094637c..757ddd7ac3 100644
--- a/paddle/phi/kernels/where_index_kernel.h
+++ b/paddle/phi/kernels/nonzero_kernel.h
@@ -19,8 +19,8 @@
 namespace phi {
 
 template <typename T, typename Context>
-void WhereIndexKernel(const Context& dev_ctx,
-                      const DenseTensor& condition,
-                      DenseTensor* out);
+void NonZeroKernel(const Context& dev_ctx,
+                   const DenseTensor& condition,
+                   DenseTensor* out);
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/reduce_prod_grad_kernel.h b/paddle/phi/kernels/prod_grad_kernel.h
similarity index 68%
rename from paddle/phi/kernels/reduce_prod_grad_kernel.h
rename to paddle/phi/kernels/prod_grad_kernel.h
index fb773f167f..5b6f8807e9 100644
--- a/paddle/phi/kernels/reduce_prod_grad_kernel.h
+++ b/paddle/phi/kernels/prod_grad_kernel.h
@@ -20,12 +20,12 @@
 namespace phi {
 
 template <typename T, typename Context>
-void ReduceProdGradKernel(const Context& dev_ctx,
-                          const DenseTensor& x,
-                          const DenseTensor& out,
-                          const DenseTensor& out_grad,
-                          const IntArray& dims,
-                          bool keep_dim,
-                          bool reduce_all,
-                          DenseTensor* x_grad);
+void ProdGradKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& out,
+                    const DenseTensor& out_grad,
+                    const IntArray& dims,
+                    bool keep_dim,
+                    bool reduce_all,
+                    DenseTensor* x_grad);
 }  // namespace phi
diff --git a/paddle/phi/kernels/reduce_prod_kernel.cc b/paddle/phi/kernels/prod_kernel.cc
similarity index 96%
rename from paddle/phi/kernels/reduce_prod_kernel.cc
rename to paddle/phi/kernels/prod_kernel.cc
index 538c5a5175..532b6fdaa1 100644
--- a/paddle/phi/kernels/reduce_prod_kernel.cc
+++ b/paddle/phi/kernels/prod_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/reduce_prod_kernel.h"
+#include "paddle/phi/kernels/prod_kernel.h"
 
 #include "paddle/phi/backends/all_context.h"
 #include "paddle/phi/core/kernel_registry.h"
diff --git a/paddle/phi/kernels/reduce_prod_kernel.h b/paddle/phi/kernels/prod_kernel.h
similarity index 100%
rename from paddle/phi/kernels/reduce_prod_kernel.h
rename to paddle/phi/kernels/prod_kernel.h
diff --git a/paddle/phi/kernels/selected_rows/hierarchical_sigmoid_grad_kernel.cc b/paddle/phi/kernels/selected_rows/hierarchical_sigmoid_grad_kernel.cc
deleted file mode 100644
index 616786d210..0000000000
--- a/paddle/phi/kernels/selected_rows/hierarchical_sigmoid_grad_kernel.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/kernels/selected_rows/hierarchical_sigmoid_grad_kernel.h"
-
-#include "paddle/fluid/framework/mixed_vector.h"
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/cpu/hierarchical_sigmoid_grad.h"
-
-namespace phi {
-namespace sr {
-
-static std::vector<int64_t> PathToRows(const DenseTensor& path) {
-  std::set<int64_t> rows;
-  const int64_t* paths = path.data<int64_t>();
-  for (int64_t i = 0; i < path.numel(); ++i) {
-    int64_t row = paths[i];
-    if (row < 0) {
-      continue;
-    }
-    rows.emplace(row);
-  }
-  return std::vector<int64_t>(rows.begin(), rows.end());
-}
-
-template <typename T, typename Context>
-void HierarchicalSigmoidGradKernel(const Context& ctx,
-                                   const DenseTensor& x,
-                                   const DenseTensor& w,
-                                   const DenseTensor& label,
-                                   const paddle::optional<DenseTensor>& path,
-                                   const paddle::optional<DenseTensor>& code,
-                                   const paddle::optional<DenseTensor>& bias,
-                                   const DenseTensor& pre_out,
-                                   const DenseTensor& out_grad,
-                                   int num_classes,
-                                   bool remote_prefetch,
-                                   int trainer_id,
-                                   const std::vector<int64_t>& height_sections,
-                                   const std::vector<std::string>& epmap,
-                                   const std::vector<std::string>& table_names,
-                                   bool is_sparse,
-                                   DenseTensor* x_grad,
-                                   SelectedRows* w_grad,
-                                   DenseTensor* bias_grad) {
-  PADDLE_ENFORCE_NOT_NULL(
-      path.get_ptr(),
-      errors::NotFound("Custom tree must be set for sparse mode!"));
-  paddle::framework::Vector<int64_t> real_rows = PathToRows(*path);
-  w_grad->set_rows(real_rows);
-  // Build a map of id -> row_index to speed up finding the index of one id
-  w_grad->set_height(w.dims()[0]);
-  auto* w_grad_value = w_grad->mutable_value();
-  phi::DDim temp_dim(w.dims());
-  temp_dim[0] = real_rows.size();
-  w_grad_value->Resize(temp_dim);
-  phi::HierarchicalSigmoidGradKernelImpl<T>(ctx,
-                                            x,
-                                            w,
-                                            label,
-                                            path,
-                                            code,
-                                            bias,
-                                            pre_out,
-                                            out_grad,
-                                            num_classes,
-                                            remote_prefetch,
-                                            trainer_id,
-                                            height_sections,
-                                            epmap,
-                                            table_names,
-                                            is_sparse,
-                                            x_grad,
-                                            w_grad_value,
-                                            bias_grad,
-                                            w_grad);
-}
-
-}  // namespace sr
-}  // namespace phi
-
-PD_REGISTER_KERNEL(hierarchical_sigmoid_grad_sr,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::sr::HierarchicalSigmoidGradKernel,
-                   float,
-                   double) {}
diff --git a/paddle/phi/kernels/selected_rows/hierarchical_sigmoid_grad_kernel.h b/paddle/phi/kernels/selected_rows/hierarchical_sigmoid_grad_kernel.h
deleted file mode 100644
index aca355f515..0000000000
--- a/paddle/phi/kernels/selected_rows/hierarchical_sigmoid_grad_kernel.h
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/selected_rows.h"
-
-namespace phi {
-namespace sr {
-
-template <typename T, typename Context>
-void HierarchicalSigmoidGradKernel(const Context& ctx,
-                                   const DenseTensor& x,
-                                   const DenseTensor& w,
-                                   const DenseTensor& label,
-                                   const paddle::optional<DenseTensor>& path,
-                                   const paddle::optional<DenseTensor>& code,
-                                   const paddle::optional<DenseTensor>& bias,
-                                   const DenseTensor& pre_out,
-                                   const DenseTensor& out_grad,
-                                   int num_classes,
-                                   bool remote_prefetch,
-                                   int trainer_id,
-                                   const std::vector<int64_t>& height_sections,
-                                   const std::vector<std::string>& epmap,
-                                   const std::vector<std::string>& table_names,
-                                   bool is_sparse,
-                                   DenseTensor* x_grad,
-                                   SelectedRows* w_grad,
-                                   DenseTensor* bias_grad);
-
-}  // namespace sr
-}  // namespace phi
diff --git a/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc
new file mode 100644
index 0000000000..1fedcb14f5
--- /dev/null
+++ b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc
@@ -0,0 +1,99 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h"
+
+#include "paddle/fluid/framework/mixed_vector.h"
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/cpu/hsigmoid_loss_grad.h"
+
+namespace phi {
+namespace sr {
+
+static std::vector<int64_t> PathToRows(const DenseTensor& path) {
+  std::set<int64_t> rows;
+  const int64_t* paths = path.data<int64_t>();
+  for (int64_t i = 0; i < path.numel(); ++i) {
+    int64_t row = paths[i];
+    if (row < 0) {
+      continue;
+    }
+    rows.emplace(row);
+  }
+  return std::vector<int64_t>(rows.begin(), rows.end());
+}
+
+template <typename T, typename Context>
+void HSigmoidLossGradKernel(const Context& ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& w,
+                            const DenseTensor& label,
+                            const paddle::optional<DenseTensor>& path,
+                            const paddle::optional<DenseTensor>& code,
+                            const paddle::optional<DenseTensor>& bias,
+                            const DenseTensor& pre_out,
+                            const DenseTensor& out_grad,
+                            int num_classes,
+                            bool remote_prefetch,
+                            int trainer_id,
+                            const std::vector<int64_t>& height_sections,
+                            const std::vector<std::string>& epmap,
+                            const std::vector<std::string>& table_names,
+                            bool is_sparse,
+                            DenseTensor* x_grad,
+                            SelectedRows* w_grad,
+                            DenseTensor* bias_grad) {
+  PADDLE_ENFORCE_NOT_NULL(
+      path.get_ptr(),
+      errors::NotFound("Custom tree must be set for sparse mode!"));
+  paddle::framework::Vector<int64_t> real_rows = PathToRows(*path);
+  w_grad->set_rows(real_rows);
+  // Build a map of id -> row_index to speed up finding the index of one id
+  w_grad->set_height(w.dims()[0]);
+  auto* w_grad_value = w_grad->mutable_value();
+  phi::DDim temp_dim(w.dims());
+  temp_dim[0] = real_rows.size();
+  w_grad_value->Resize(temp_dim);
+  phi::HSigmoidLossGradKernelImpl<T>(ctx,
+                                     x,
+                                     w,
+                                     label,
+                                     path,
+                                     code,
+                                     bias,
+                                     pre_out,
+                                     out_grad,
+                                     num_classes,
+                                     remote_prefetch,
+                                     trainer_id,
+                                     height_sections,
+                                     epmap,
+                                     table_names,
+                                     is_sparse,
+                                     x_grad,
+                                     w_grad_value,
+                                     bias_grad,
+                                     w_grad);
+}
+
+}  // namespace sr
+}  // namespace phi
+
+PD_REGISTER_KERNEL(hsigmoid_loss_grad_sr,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::sr::HSigmoidLossGradKernel,
+                   float,
+                   double) {}
diff --git a/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h
new file mode 100644
index 0000000000..fe4ffe2460
--- /dev/null
+++ b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h
@@ -0,0 +1,45 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/selected_rows.h"
+
+namespace phi {
+namespace sr {
+
+template <typename T, typename Context>
+void HSigmoidLossGradKernel(const Context& ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& w,
+                            const DenseTensor& label,
+                            const paddle::optional<DenseTensor>& path,
+                            const paddle::optional<DenseTensor>& code,
+                            const paddle::optional<DenseTensor>& bias,
+                            const DenseTensor& pre_out,
+                            const DenseTensor& out_grad,
+                            int num_classes,
+                            bool remote_prefetch,
+                            int trainer_id,
+                            const std::vector<int64_t>& height_sections,
+                            const std::vector<std::string>& epmap,
+                            const std::vector<std::string>& table_names,
+                            bool is_sparse,
+                            DenseTensor* x_grad,
+                            SelectedRows* w_grad,
+                            DenseTensor* bias_grad);
+
+}  // namespace sr
+}  // namespace phi
diff --git a/paddle/phi/kernels/selected_rows/uniform_kernel.cc b/paddle/phi/kernels/selected_rows/uniform_kernel.cc
new file mode 100644
index 0000000000..73d00aa9a7
--- /dev/null
+++ b/paddle/phi/kernels/selected_rows/uniform_kernel.cc
@@ -0,0 +1,96 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/kernels/selected_rows/uniform_kernel.h"
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/uniform_kernel.h"
+
+namespace phi {
+namespace sr {
+
+template <typename T, typename Context>
+void UniformRawKernel(const Context& dev_ctx,
+                      const IntArray& shape,
+                      DataType dtype,
+                      const Scalar& min,
+                      const Scalar& max,
+                      int seed,
+                      int diag_num,
+                      int diag_step,
+                      float diag_val,
+                      SelectedRows* out) {
+  phi::UniformRawKernel<T>(dev_ctx,
+                           shape,
+                           dtype,
+                           min,
+                           max,
+                           seed,
+                           diag_num,
+                           diag_step,
+                           diag_val,
+                           out->mutable_value());
+}
+
+template <typename T, typename Context>
+void UniformKernel(const Context& dev_ctx,
+                   const IntArray& shape,
+                   DataType dtype,
+                   const Scalar& min,
+                   const Scalar& max,
+                   int seed,
+                   SelectedRows* out) {
+  phi::UniformKernel<T>(
+      dev_ctx, shape, dtype, min, max, seed, out->mutable_value());
+}
+
+}  // namespace sr
+}  // namespace phi
+
+PD_REGISTER_KERNEL(uniform_raw_sr,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::sr::UniformRawKernel,
+                   float,
+                   double,
+                   phi::dtype::bfloat16) {}
+
+PD_REGISTER_KERNEL(uniform_sr,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::sr::UniformKernel,
+                   float,
+                   double,
+                   phi::dtype::bfloat16) {}
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+
+PD_REGISTER_KERNEL(
+    uniform_raw_sr, GPU, ALL_LAYOUT, phi::sr::UniformRawKernel, float, double) {
+}
+
+PD_REGISTER_KERNEL(
+    uniform_sr, GPU, ALL_LAYOUT, phi::sr::UniformKernel, float, double) {}
+#endif
+
+#if defined(PADDLE_WITH_XPU)
+
+PD_REGISTER_KERNEL(
+    uniform_raw_sr, XPU, ALL_LAYOUT, phi::sr::UniformRawKernel, float) {}
+
+PD_REGISTER_KERNEL(uniform_sr, XPU, ALL_LAYOUT, phi::sr::UniformKernel, float) {
+}
+#endif
diff --git a/paddle/phi/kernels/selected_rows/uniform_random_kernel.h b/paddle/phi/kernels/selected_rows/uniform_kernel.h
similarity index 54%
rename from paddle/phi/kernels/selected_rows/uniform_random_kernel.h
rename to paddle/phi/kernels/selected_rows/uniform_kernel.h
index 237b01532c..dc50175db8 100644
--- a/paddle/phi/kernels/selected_rows/uniform_random_kernel.h
+++ b/paddle/phi/kernels/selected_rows/uniform_kernel.h
@@ -22,25 +22,25 @@ namespace phi {
 namespace sr {
 
 template <typename T, typename Context>
-void UniformRandomRawKernel(const Context& dev_ctx,
-                            const IntArray& shape,
-                            DataType dtype,
-                            const Scalar& min,
-                            const Scalar& max,
-                            int seed,
-                            int diag_num,
-                            int diag_step,
-                            float diag_val,
-                            SelectedRows* out);
+void UniformRawKernel(const Context& dev_ctx,
+                      const IntArray& shape,
+                      DataType dtype,
+                      const Scalar& min,
+                      const Scalar& max,
+                      int seed,
+                      int diag_num,
+                      int diag_step,
+                      float diag_val,
+                      SelectedRows* out);
 
 template <typename T, typename Context>
-void UniformRandomKernel(const Context& dev_ctx,
-                         const IntArray& shape,
-                         DataType dtype,
-                         const Scalar& min,
-                         const Scalar& max,
-                         int seed,
-                         SelectedRows* out);
+void UniformKernel(const Context& dev_ctx,
+                   const IntArray& shape,
+                   DataType dtype,
+                   const Scalar& min,
+                   const Scalar& max,
+                   int seed,
+                   SelectedRows* out);
 
 }  // namespace sr
 }  // namespace phi
diff --git a/paddle/phi/kernels/selected_rows/uniform_random_kernel.cc b/paddle/phi/kernels/selected_rows/uniform_random_kernel.cc
deleted file mode 100644
index d6037da45f..0000000000
--- a/paddle/phi/kernels/selected_rows/uniform_random_kernel.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/phi/kernels/selected_rows/uniform_random_kernel.h"
-
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/backends/gpu/gpu_context.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/uniform_random_kernel.h"
-
-namespace phi {
-namespace sr {
-
-template <typename T, typename Context>
-void UniformRandomRawKernel(const Context& dev_ctx,
-                            const IntArray& shape,
-                            DataType dtype,
-                            const Scalar& min,
-                            const Scalar& max,
-                            int seed,
-                            int diag_num,
-                            int diag_step,
-                            float diag_val,
-                            SelectedRows* out) {
-  phi::UniformRandomRawKernel<T>(dev_ctx,
-                                 shape,
-                                 dtype,
-                                 min,
-                                 max,
-                                 seed,
-                                 diag_num,
-                                 diag_step,
-                                 diag_val,
-                                 out->mutable_value());
-}
-
-template <typename T, typename Context>
-void UniformRandomKernel(const Context& dev_ctx,
-                         const IntArray& shape,
-                         DataType dtype,
-                         const Scalar& min,
-                         const Scalar& max,
-                         int seed,
-                         SelectedRows* out) {
-  phi::UniformRandomKernel<T>(
-      dev_ctx, shape, dtype, min, max, seed, out->mutable_value());
-}
-
-}  // namespace sr
-}  // namespace phi
-
-PD_REGISTER_KERNEL(uniform_random_raw_sr,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::sr::UniformRandomRawKernel,
-                   float,
-                   double,
-                   phi::dtype::bfloat16) {}
-
-PD_REGISTER_KERNEL(uniform_random_sr,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::sr::UniformRandomKernel,
-                   float,
-                   double,
-                   phi::dtype::bfloat16) {}
-
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-
-PD_REGISTER_KERNEL(uniform_random_raw_sr,
-                   GPU,
-                   ALL_LAYOUT,
-                   phi::sr::UniformRandomRawKernel,
-                   float,
-                   double) {}
-
-PD_REGISTER_KERNEL(uniform_random_sr,
-                   GPU,
-                   ALL_LAYOUT,
-                   phi::sr::UniformRandomKernel,
-                   float,
-                   double) {}
-#endif
-
-#if defined(PADDLE_WITH_XPU)
-
-PD_REGISTER_KERNEL(uniform_random_raw_sr,
-                   XPU,
-                   ALL_LAYOUT,
-                   phi::sr::UniformRandomRawKernel,
-                   float) {}
-
-PD_REGISTER_KERNEL(
-    uniform_random_sr, XPU, ALL_LAYOUT, phi::sr::UniformRandomKernel, float) {}
-#endif
diff --git a/paddle/phi/kernels/tril_triu_grad_kernel.h b/paddle/phi/kernels/tril_grad_kernel.h
similarity index 77%
rename from paddle/phi/kernels/tril_triu_grad_kernel.h
rename to paddle/phi/kernels/tril_grad_kernel.h
index 10faf5c48d..7fc5e77363 100644
--- a/paddle/phi/kernels/tril_triu_grad_kernel.h
+++ b/paddle/phi/kernels/tril_grad_kernel.h
@@ -19,10 +19,10 @@
 namespace phi {
 
 template <typename T, typename Context>
-void TrilTriuGradKernel(const Context& ctx,
-                        const DenseTensor& out_grad,
-                        int diagonal,
-                        bool lower,
-                        DenseTensor* x_grad);
+void TrilGradKernel(const Context& ctx,
+                    const DenseTensor& out_grad,
+                    int diagonal,
+                    bool lower,
+                    DenseTensor* x_grad);
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/tril_triu_kernel.h b/paddle/phi/kernels/tril_kernel.h
similarity index 66%
rename from paddle/phi/kernels/tril_triu_kernel.h
rename to paddle/phi/kernels/tril_kernel.h
index 8d4c44c5b3..52154c2b2b 100644
--- a/paddle/phi/kernels/tril_triu_kernel.h
+++ b/paddle/phi/kernels/tril_kernel.h
@@ -20,21 +20,21 @@
 namespace phi {
 
 template <typename T, typename Context>
-void TrilTriuKernel(const Context& ctx,
-                    const DenseTensor& x,
-                    int diagonal,
-                    bool lower,
-                    DenseTensor* out);
+void TrilKernel(const Context& ctx,
+                const DenseTensor& x,
+                int diagonal,
+                bool lower,
+                DenseTensor* out);
 
 template <typename T, typename Context>
-DenseTensor TrilTriu(const Context& ctx,
-                     const DenseTensor& x,
-                     int diagonal,
-                     bool lower) {
+DenseTensor Tril(const Context& ctx,
+                 const DenseTensor& x,
+                 int diagonal,
+                 bool lower) {
   DenseTensor dense_out;
   MetaTensor meta_out(&dense_out);
-  TrilTriuInferMeta(x, diagonal, lower, &meta_out);
-  TrilTriuKernel<T, Context>(ctx, x, diagonal, lower, &dense_out);
+  TrilInferMeta(x, diagonal, lower, &meta_out);
+  TrilKernel<T, Context>(ctx, x, diagonal, lower, &dense_out);
   return dense_out;
 }
 
diff --git a/paddle/phi/kernels/uniform_random_inplace_kernel.h b/paddle/phi/kernels/uniform_inplace_grad_kernel.h
similarity index 63%
rename from paddle/phi/kernels/uniform_random_inplace_kernel.h
rename to paddle/phi/kernels/uniform_inplace_grad_kernel.h
index 97a79375af..fd37f3c6f5 100644
--- a/paddle/phi/kernels/uniform_random_inplace_kernel.h
+++ b/paddle/phi/kernels/uniform_inplace_grad_kernel.h
@@ -19,14 +19,14 @@ limitations under the License. */
 namespace phi {
 
 template <typename T, typename Context>
-void UniformRandomInplaceKernel(const Context& ctx,
-                                const DenseTensor& x,
-                                float min,
-                                float max,
-                                int seed,
-                                int diag_num,
-                                int diag_step,
-                                float diag_val,
-                                DenseTensor* out);
+void UniformInplaceGradKernel(const Context& ctx,
+                              const DenseTensor& out_grad,
+                              float min,
+                              float max,
+                              int seed,
+                              int diag_num,
+                              int diag_step,
+                              float diag_val,
+                              DenseTensor* x_grad);
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/uniform_random_inplace_grad_kernel.h b/paddle/phi/kernels/uniform_inplace_kernel.h
similarity index 60%
rename from paddle/phi/kernels/uniform_random_inplace_grad_kernel.h
rename to paddle/phi/kernels/uniform_inplace_kernel.h
index ae74fbe2fd..9bb17b4a33 100644
--- a/paddle/phi/kernels/uniform_random_inplace_grad_kernel.h
+++ b/paddle/phi/kernels/uniform_inplace_kernel.h
@@ -19,14 +19,14 @@ limitations under the License. */
 namespace phi {
 
 template <typename T, typename Context>
-void UniformRandomInplaceGradKernel(const Context& ctx,
-                                    const DenseTensor& out_grad,
-                                    float min,
-                                    float max,
-                                    int seed,
-                                    int diag_num,
-                                    int diag_step,
-                                    float diag_val,
-                                    DenseTensor* x_grad);
+void UniformInplaceKernel(const Context& ctx,
+                          const DenseTensor& x,
+                          float min,
+                          float max,
+                          int seed,
+                          int diag_num,
+                          int diag_step,
+                          float diag_val,
+                          DenseTensor* out);
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/uniform_random_kernel.cc b/paddle/phi/kernels/uniform_kernel.cc
similarity index 68%
rename from paddle/phi/kernels/uniform_random_kernel.cc
rename to paddle/phi/kernels/uniform_kernel.cc
index 6669438cc3..3744fc49d7 100644
--- a/paddle/phi/kernels/uniform_random_kernel.cc
+++ b/paddle/phi/kernels/uniform_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/uniform_random_kernel.h"
+#include "paddle/phi/kernels/uniform_kernel.h"
 
 #include "paddle/phi/common/int_array.h"
 #include "paddle/phi/common/scalar.h"
@@ -29,38 +29,36 @@
 namespace phi {
 
 template <typename T, typename Context>
-void UniformRandomKernel(const Context& dev_ctx,
-                         const IntArray& shape,
-                         DataType dtype,
-                         const Scalar& min,
-                         const Scalar& max,
-                         int seed,
-                         DenseTensor* out) {
-  UniformRandomRawKernel<T>(
-      dev_ctx, shape, dtype, min, max, seed, 0, 0, 0.0f, out);
+void UniformKernel(const Context& dev_ctx,
+                   const IntArray& shape,
+                   DataType dtype,
+                   const Scalar& min,
+                   const Scalar& max,
+                   int seed,
+                   DenseTensor* out) {
+  UniformRawKernel<T>(dev_ctx, shape, dtype, min, max, seed, 0, 0, 0.0f, out);
 }
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(uniform_random,
+PD_REGISTER_KERNEL(uniform,
                    CPU,
                    ALL_LAYOUT,
-                   phi::UniformRandomKernel,
+                   phi::UniformKernel,
                    float,
                    double,
                    phi::dtype::bfloat16) {}
 
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-PD_REGISTER_KERNEL(uniform_random,
+PD_REGISTER_KERNEL(uniform,
                    GPU,
                    ALL_LAYOUT,
-                   phi::UniformRandomKernel,
+                   phi::UniformKernel,
                    float,
                    double,
                    phi::dtype::float16) {}
 #endif
 
 #ifdef PADDLE_WITH_XPU
-PD_REGISTER_KERNEL(
-    uniform_random, XPU, ALL_LAYOUT, phi::UniformRandomKernel, float) {}
+PD_REGISTER_KERNEL(uniform, XPU, ALL_LAYOUT, phi::UniformKernel, float) {}
 #endif
diff --git a/paddle/phi/kernels/uniform_random_kernel.h b/paddle/phi/kernels/uniform_kernel.h
similarity index 54%
rename from paddle/phi/kernels/uniform_random_kernel.h
rename to paddle/phi/kernels/uniform_kernel.h
index 1395a4663b..ef19c20b93 100644
--- a/paddle/phi/kernels/uniform_random_kernel.h
+++ b/paddle/phi/kernels/uniform_kernel.h
@@ -22,24 +22,24 @@
 namespace phi {
 
 template <typename T, typename Context>
-void UniformRandomRawKernel(const Context& dev_ctx,
-                            const IntArray& shape,
-                            DataType dtype,
-                            const Scalar& min,
-                            const Scalar& max,
-                            int seed,
-                            int diag_num,
-                            int diag_step,
-                            float diag_val,
-                            DenseTensor* out);
+void UniformRawKernel(const Context& dev_ctx,
+                      const IntArray& shape,
+                      DataType dtype,
+                      const Scalar& min,
+                      const Scalar& max,
+                      int seed,
+                      int diag_num,
+                      int diag_step,
+                      float diag_val,
+                      DenseTensor* out);
 
 template <typename T, typename Context>
-void UniformRandomKernel(const Context& dev_ctx,
-                         const IntArray& shape,
-                         DataType dtype,
-                         const Scalar& min,
-                         const Scalar& max,
-                         int seed,
-                         DenseTensor* out);
+void UniformKernel(const Context& dev_ctx,
+                   const IntArray& shape,
+                   DataType dtype,
+                   const Scalar& min,
+                   const Scalar& max,
+                   int seed,
+                   DenseTensor* out);
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/xpu/where_index_kernel.cc b/paddle/phi/kernels/xpu/nonzero_kernel.cc
similarity index 89%
rename from paddle/phi/kernels/xpu/where_index_kernel.cc
rename to paddle/phi/kernels/xpu/nonzero_kernel.cc
index f6653e57f6..cf936f659f 100644
--- a/paddle/phi/kernels/xpu/where_index_kernel.cc
+++ b/paddle/phi/kernels/xpu/nonzero_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/where_index_kernel.h"
+#include "paddle/phi/kernels/nonzero_kernel.h"
 
 #include "paddle/fluid/memory/memcpy.h"
 #include "paddle/fluid/platform/device/xpu/xpu_header.h"
@@ -22,9 +22,9 @@
 namespace phi {
 
 template <typename T, typename Context>
-void WhereIndexKernel(const Context& dev_ctx,
-                      const DenseTensor& condition,
-                      DenseTensor* out) {
+void NonZeroKernel(const Context& dev_ctx,
+                   const DenseTensor& condition,
+                   DenseTensor* out) {
   const T* cond_data = condition.data<T>();
   auto numel = condition.numel();
   auto dims = condition.dims();
@@ -69,4 +69,4 @@ void WhereIndexKernel(const Context& dev_ctx,
 }  // namespace phi
 
 PD_REGISTER_KERNEL(
-    where_index, XPU, ALL_LAYOUT, phi::WhereIndexKernel, int, bool, float) {}
+    nonzero, XPU, ALL_LAYOUT, phi::NonZeroKernel, int, bool, float) {}
diff --git a/paddle/phi/kernels/xpu/reduce_prod_kernel.cc b/paddle/phi/kernels/xpu/prod_kernel.cc
similarity index 96%
rename from paddle/phi/kernels/xpu/reduce_prod_kernel.cc
rename to paddle/phi/kernels/xpu/prod_kernel.cc
index c82dd1b5f6..7be48a8bab 100644
--- a/paddle/phi/kernels/xpu/reduce_prod_kernel.cc
+++ b/paddle/phi/kernels/xpu/prod_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/reduce_prod_kernel.h"
+#include "paddle/phi/kernels/prod_kernel.h"
 
 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/backends/xpu/xpu_context.h"
diff --git a/paddle/phi/kernels/xpu/tril_triu_grad_kernel.cc b/paddle/phi/kernels/xpu/tril_grad_kernel.cc
similarity index 81%
rename from paddle/phi/kernels/xpu/tril_triu_grad_kernel.cc
rename to paddle/phi/kernels/xpu/tril_grad_kernel.cc
index 964e9c6174..af8dfdd8c0 100644
--- a/paddle/phi/kernels/xpu/tril_triu_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/tril_grad_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/tril_triu_grad_kernel.h"
+#include "paddle/phi/kernels/tril_grad_kernel.h"
 
 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/core/kernel_registry.h"
@@ -20,11 +20,11 @@
 namespace phi {
 
 template <typename T, typename Context>
-void TrilTriuGradKernel(const Context& ctx,
-                        const DenseTensor& out_grad,
-                        int diagonal,
-                        bool lower,
-                        DenseTensor* x_grad) {
+void TrilGradKernel(const Context& ctx,
+                    const DenseTensor& out_grad,
+                    int diagonal,
+                    bool lower,
+                    DenseTensor* x_grad) {
   using XPUType = typename XPUTypeTrait<T>::Type;
   ctx.template Alloc<T>(x_grad);
   auto dy_shape = vectorize<int>(out_grad.dims());
@@ -49,4 +49,4 @@ void TrilTriuGradKernel(const Context& ctx,
 }  // namespace phi
 
 PD_REGISTER_KERNEL(
-    tril_triu_grad, XPU, ALL_LAYOUT, phi::TrilTriuGradKernel, int, float) {}
+    tril_grad, XPU, ALL_LAYOUT, phi::TrilGradKernel, int, float) {}
diff --git a/paddle/phi/kernels/xpu/tril_triu_kernel.cc b/paddle/phi/kernels/xpu/tril_kernel.cc
similarity index 82%
rename from paddle/phi/kernels/xpu/tril_triu_kernel.cc
rename to paddle/phi/kernels/xpu/tril_kernel.cc
index 3d9ae98a23..4b4cf579c2 100644
--- a/paddle/phi/kernels/xpu/tril_triu_kernel.cc
+++ b/paddle/phi/kernels/xpu/tril_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/tril_triu_kernel.h"
+#include "paddle/phi/kernels/tril_kernel.h"
 
 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/core/kernel_registry.h"
@@ -20,11 +20,11 @@
 namespace phi {
 
 template <typename T, typename Context>
-void TrilTriuKernel(const Context& ctx,
-                    const DenseTensor& x,
-                    int diagonal,
-                    bool lower,
-                    DenseTensor* out) {
+void TrilKernel(const Context& ctx,
+                const DenseTensor& x,
+                int diagonal,
+                bool lower,
+                DenseTensor* out) {
   using XPUType = typename XPUTypeTrait<T>::Type;
   ctx.template Alloc<T>(out);
   auto xshape = vectorize<int>(x.dims());
@@ -48,5 +48,4 @@ void TrilTriuKernel(const Context& ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(
-    tril_triu, XPU, ALL_LAYOUT, phi::TrilTriuKernel, int, float) {}
+PD_REGISTER_KERNEL(tril, XPU, ALL_LAYOUT, phi::TrilKernel, int, float) {}
diff --git a/paddle/phi/kernels/xpu/uniform_random_kernel.cc b/paddle/phi/kernels/xpu/uniform_kernel.cc
similarity index 77%
rename from paddle/phi/kernels/xpu/uniform_random_kernel.cc
rename to paddle/phi/kernels/xpu/uniform_kernel.cc
index 48384164e7..f4732939b6 100644
--- a/paddle/phi/kernels/xpu/uniform_random_kernel.cc
+++ b/paddle/phi/kernels/xpu/uniform_kernel.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/phi/kernels/uniform_random_kernel.h"
+#include "paddle/phi/kernels/uniform_kernel.h"
 
 #include <string>
 
@@ -24,16 +24,16 @@ limitations under the License. */
 namespace phi {
 
 template <typename T, typename Context>
-void UniformRandomRawKernel(const Context &dev_ctx,
-                            const IntArray &shape,
-                            DataType dtype,
-                            const Scalar &min,
-                            const Scalar &max,
-                            int seed,
-                            int diag_num,
-                            int diag_step,
-                            float diag_val,
-                            DenseTensor *out) {
+void UniformRawKernel(const Context &dev_ctx,
+                      const IntArray &shape,
+                      DataType dtype,
+                      const Scalar &min,
+                      const Scalar &max,
+                      int seed,
+                      int diag_num,
+                      int diag_step,
+                      float diag_val,
+                      DenseTensor *out) {
   out->Resize(phi::make_ddim(shape.GetData()));
   T *data = dev_ctx.template Alloc<T>(out);
   int64_t size = out->numel();
@@ -76,5 +76,5 @@ void UniformRandomRawKernel(const Context &dev_ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(
-    uniform_random_raw, XPU, ALL_LAYOUT, phi::UniformRandomRawKernel, float) {}
+PD_REGISTER_KERNEL(uniform_raw, XPU, ALL_LAYOUT, phi::UniformRawKernel, float) {
+}
diff --git a/paddle/phi/kernels/yolo_loss_grad_kernel.h b/paddle/phi/kernels/yolo_loss_grad_kernel.h
new file mode 100644
index 0000000000..905c1ab4ef
--- /dev/null
+++ b/paddle/phi/kernels/yolo_loss_grad_kernel.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void YoloLossGradKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        const DenseTensor& gt_box,
+                        const DenseTensor& gt_label,
+                        const paddle::optional<DenseTensor>& gt_score,
+                        const DenseTensor& objectness_mask,
+                        const DenseTensor& gt_match_mask,
+                        const DenseTensor& loss_grad,
+                        const std::vector<int>& anchors,
+                        const std::vector<int>& anchor_mask,
+                        int class_num,
+                        float ignore_thresh,
+                        int downsample_ratio,
+                        bool use_label_smooth,
+                        float scale_x_Y,
+                        DenseTensor* x_grad,
+                        DenseTensor* gt_box_grad,
+                        DenseTensor* gt_label_grad,
+                        DenseTensor* gt_score_grad);
+
+}  // namespace phi
diff --git a/paddle/phi/kernels/yolo_loss_kernel.h b/paddle/phi/kernels/yolo_loss_kernel.h
new file mode 100644
index 0000000000..17735fe756
--- /dev/null
+++ b/paddle/phi/kernels/yolo_loss_kernel.h
@@ -0,0 +1,38 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void YoloLossKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& gt_box,
+                    const DenseTensor& gt_label,
+                    const paddle::optional<DenseTensor>& gt_score,
+                    const std::vector<int>& anchors,
+                    const std::vector<int>& anchor_mask,
+                    int class_num,
+                    float ignore_thresh,
+                    int downsample_ratio,
+                    bool use_label_smooth,
+                    float scale_x_Y,
+                    DenseTensor* loss,
+                    DenseTensor* objectness_mask,
+                    DenseTensor* gt_match_mask);
+
+}  // namespace phi
diff --git a/paddle/phi/kernels/yolov3_loss_grad_kernel.h b/paddle/phi/kernels/yolov3_loss_grad_kernel.h
deleted file mode 100644
index 4d0be5bebb..0000000000
--- a/paddle/phi/kernels/yolov3_loss_grad_kernel.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "paddle/phi/core/dense_tensor.h"
-
-namespace phi {
-
-template <typename T, typename Context>
-void Yolov3LossGradKernel(const Context& dev_ctx,
-                          const DenseTensor& x,
-                          const DenseTensor& gt_box,
-                          const DenseTensor& gt_label,
-                          const paddle::optional<DenseTensor>& gt_score,
-                          const DenseTensor& objectness_mask,
-                          const DenseTensor& gt_match_mask,
-                          const DenseTensor& loss_grad,
-                          const std::vector<int>& anchors,
-                          const std::vector<int>& anchor_mask,
-                          int class_num,
-                          float ignore_thresh,
-                          int downsample_ratio,
-                          bool use_label_smooth,
-                          float scale_x_Y,
-                          DenseTensor* x_grad,
-                          DenseTensor* gt_box_grad,
-                          DenseTensor* gt_label_grad,
-                          DenseTensor* gt_score_grad);
-
-}  // namespace phi
diff --git a/paddle/phi/kernels/yolov3_loss_kernel.h b/paddle/phi/kernels/yolov3_loss_kernel.h
deleted file mode 100644
index 3dabe5ce82..0000000000
--- a/paddle/phi/kernels/yolov3_loss_kernel.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "paddle/phi/core/dense_tensor.h"
-
-namespace phi {
-
-template <typename T, typename Context>
-void Yolov3LossKernel(const Context& dev_ctx,
-                      const DenseTensor& x,
-                      const DenseTensor& gt_box,
-                      const DenseTensor& gt_label,
-                      const paddle::optional<DenseTensor>& gt_score,
-                      const std::vector<int>& anchors,
-                      const std::vector<int>& anchor_mask,
-                      int class_num,
-                      float ignore_thresh,
-                      int downsample_ratio,
-                      bool use_label_smooth,
-                      float scale_x_Y,
-                      DenseTensor* loss,
-                      DenseTensor* objectness_mask,
-                      DenseTensor* gt_match_mask);
-
-}  // namespace phi
diff --git a/paddle/phi/ops/compat/hierarchical_sigmoid_sig.cc b/paddle/phi/ops/compat/hierarchical_sigmoid_sig.cc
index 5393439901..c4e04e5d40 100644
--- a/paddle/phi/ops/compat/hierarchical_sigmoid_sig.cc
+++ b/paddle/phi/ops/compat/hierarchical_sigmoid_sig.cc
@@ -18,7 +18,7 @@ namespace phi {
 
 KernelSignature HierarchicalSigmoidOpArgumentMapping(
     const ArgumentMappingContext& ctx) {
-  return KernelSignature("hierarchical_sigmoid",
+  return KernelSignature("hsigmoid_loss",
                          {"X", "W", "Label", "PathTable", "PathCode", "Bias"},
                          {"num_classes",
                           "remote_prefetch",
@@ -33,7 +33,7 @@ KernelSignature HierarchicalSigmoidOpArgumentMapping(
 KernelSignature HierarchicalSigmoidGradOpArgumentMapping(
     const ArgumentMappingContext& ctx) {
   if (ctx.IsDenseTensorOutput("W@GRAD")) {
-    return KernelSignature("hierarchical_sigmoid_grad",
+    return KernelSignature("hsigmoid_loss_grad",
                            {"X",
                             "W",
                             "Label",
@@ -51,7 +51,7 @@ KernelSignature HierarchicalSigmoidGradOpArgumentMapping(
                             "is_sparse"},
                            {"X@GRAD", "W@GRAD", "Bias@GRAD"});
   } else if (ctx.IsSelectedRowsOutput("W@GRAD")) {
-    return KernelSignature("hierarchical_sigmoid_grad_sr",
+    return KernelSignature("hsigmoid_loss_grad_sr",
                            {"X",
                             "W",
                             "Label",
@@ -75,6 +75,9 @@ KernelSignature HierarchicalSigmoidGradOpArgumentMapping(
 
 }  // namespace phi
 
+PD_REGISTER_BASE_KERNEL_NAME(hierarchical_sigmoid, hsigmoid_loss);
+PD_REGISTER_BASE_KERNEL_NAME(hierarchical_sigmoid_grad, hsigmoid_loss_grad);
+
 PD_REGISTER_ARG_MAPPING_FN(hierarchical_sigmoid,
                            phi::HierarchicalSigmoidOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(hierarchical_sigmoid_grad,
diff --git a/paddle/phi/ops/compat/tril_triu_sig.cc b/paddle/phi/ops/compat/tril_triu_sig.cc
index 3c5fa15b41..3cf022c60e 100644
--- a/paddle/phi/ops/compat/tril_triu_sig.cc
+++ b/paddle/phi/ops/compat/tril_triu_sig.cc
@@ -17,16 +17,19 @@ limitations under the License. */
 namespace phi {
 
 KernelSignature TrilTriuOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("tril_triu", {"X"}, {"diagonal", "lower"}, {"Out"});
+  return KernelSignature("tril", {"X"}, {"diagonal", "lower"}, {"Out"});
 }
 
 KernelSignature TrilTriuGradOpArgumentMapping(
     const ArgumentMappingContext& ctx) {
   return KernelSignature(
-      "tril_triu_grad", {"Out@GRAD"}, {"diagonal", "lower"}, {"X@GRAD"});
+      "tril_grad", {"Out@GRAD"}, {"diagonal", "lower"}, {"X@GRAD"});
 }
 
 }  // namespace phi
 
+PD_REGISTER_BASE_KERNEL_NAME(tril_triu, tril);
+PD_REGISTER_BASE_KERNEL_NAME(tril_triu_grad, tril_grad);
+
 PD_REGISTER_ARG_MAPPING_FN(tril_triu, phi::TrilTriuOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(tril_triu_grad, phi::TrilTriuGradOpArgumentMapping);
diff --git a/paddle/phi/ops/compat/uniform_random_inplace_sig.cc b/paddle/phi/ops/compat/uniform_random_inplace_sig.cc
index afdc0d5f3b..ae955e9ca1 100644
--- a/paddle/phi/ops/compat/uniform_random_inplace_sig.cc
+++ b/paddle/phi/ops/compat/uniform_random_inplace_sig.cc
@@ -18,7 +18,7 @@ namespace phi {
 KernelSignature UniformRandomInplaceOpArgumentMapping(
     const ArgumentMappingContext& ctx) {
   return KernelSignature(
-      "uniform_random_inplace",
+      "uniform_inplace",
       {"X"},
       {"min", "max", "seed", "diag_num", "diag_step", "diag_val"},
       {"Out"});
@@ -27,7 +27,7 @@ KernelSignature UniformRandomInplaceOpArgumentMapping(
 KernelSignature UniformRandomInplaceGradOpArgumentMapping(
     const ArgumentMappingContext& ctx) {
   return KernelSignature(
-      "uniform_random_inplace_grad",
+      "uniform_inplace_grad",
       {"Out@GRAD"},
       {"min", "max", "seed", "diag_num", "diag_step", "diag_val"},
       {"X@GRAD"});
@@ -35,6 +35,8 @@ KernelSignature UniformRandomInplaceGradOpArgumentMapping(
 
 }  // namespace phi
 
+PD_REGISTER_BASE_KERNEL_NAME(uniform_random_inplace, uniform_inplace);
+
 PD_REGISTER_ARG_MAPPING_FN(uniform_random_inplace,
                            phi::UniformRandomInplaceOpArgumentMapping);
 
diff --git a/paddle/phi/ops/compat/uniform_random_sig.cc b/paddle/phi/ops/compat/uniform_random_sig.cc
index d06d4026f4..a6d0b185c6 100644
--- a/paddle/phi/ops/compat/uniform_random_sig.cc
+++ b/paddle/phi/ops/compat/uniform_random_sig.cc
@@ -22,7 +22,7 @@ KernelSignature UniformRandomOpArgumentMapping(
   if (ctx.IsDenseTensorOutput("Out")) {
     if (diag_num) {
       if (ctx.InputSize("ShapeTensorList") > 0) {
-        return KernelSignature("uniform_random_raw",
+        return KernelSignature("uniform_raw",
                                {},
                                {"ShapeTensorList",
                                 "dtype",
@@ -37,7 +37,7 @@ KernelSignature UniformRandomOpArgumentMapping(
         const auto& shape =
             paddle::any_cast<std::vector<int64_t>>(ctx.Attr("shape"));
         if (ctx.HasInput("ShapeTensor") && shape.empty()) {
-          return KernelSignature("uniform_random_raw",
+          return KernelSignature("uniform_raw",
                                  {},
                                  {"ShapeTensor",
                                   "dtype",
@@ -49,7 +49,7 @@ KernelSignature UniformRandomOpArgumentMapping(
                                   "diag_val"},
                                  {"Out"});
         } else {
-          return KernelSignature("uniform_random_raw",
+          return KernelSignature("uniform_raw",
                                  {},
                                  {"shape",
                                   "dtype",
@@ -65,7 +65,7 @@ KernelSignature UniformRandomOpArgumentMapping(
     } else {
       if (ctx.InputSize("ShapeTensorList") > 0) {
         return KernelSignature(
-            "uniform_random",
+            "uniform",
             {},
             {"ShapeTensorList", "dtype", "min", "max", "seed"},
             {"Out"});
@@ -73,22 +73,20 @@ KernelSignature UniformRandomOpArgumentMapping(
         const auto& shape =
             paddle::any_cast<std::vector<int64_t>>(ctx.Attr("shape"));
         if (ctx.HasInput("ShapeTensor") && shape.empty()) {
-          return KernelSignature("uniform_random",
+          return KernelSignature("uniform",
                                  {},
                                  {"ShapeTensor", "dtype", "min", "max", "seed"},
                                  {"Out"});
         } else {
-          return KernelSignature("uniform_random",
-                                 {},
-                                 {"shape", "dtype", "min", "max", "seed"},
-                                 {"Out"});
+          return KernelSignature(
+              "uniform", {}, {"shape", "dtype", "min", "max", "seed"}, {"Out"});
         }
       }
     }
   } else if (ctx.IsSelectedRowsOutput("Out")) {
     if (diag_num) {
       if (ctx.InputSize("ShapeTensorList") > 0) {
-        return KernelSignature("uniform_random_raw_sr",
+        return KernelSignature("uniform_raw_sr",
                                {},
                                {"ShapeTensorList",
                                 "dtype",
@@ -103,7 +101,7 @@ KernelSignature UniformRandomOpArgumentMapping(
         const auto& shape =
             paddle::any_cast<std::vector<int64_t>>(ctx.Attr("shape"));
         if (ctx.HasInput("ShapeTensor") && shape.empty()) {
-          return KernelSignature("uniform_random_raw_sr",
+          return KernelSignature("uniform_raw_sr",
                                  {},
                                  {"ShapeTensor",
                                   "dtype",
@@ -115,7 +113,7 @@ KernelSignature UniformRandomOpArgumentMapping(
                                   "diag_val"},
                                  {"Out"});
         } else {
-          return KernelSignature("uniform_random_raw_sr",
+          return KernelSignature("uniform_raw_sr",
                                  {},
                                  {"shape",
                                   "dtype",
@@ -131,7 +129,7 @@ KernelSignature UniformRandomOpArgumentMapping(
     } else {
       if (ctx.InputSize("ShapeTensorList") > 0) {
         return KernelSignature(
-            "uniform_random_sr",
+            "uniform_sr",
             {},
             {"ShapeTensorList", "dtype", "min", "max", "seed"},
             {"Out"});
@@ -139,12 +137,12 @@ KernelSignature UniformRandomOpArgumentMapping(
         const auto& shape =
             paddle::any_cast<std::vector<int64_t>>(ctx.Attr("shape"));
         if (ctx.HasInput("ShapeTensor") && shape.empty()) {
-          return KernelSignature("uniform_random_sr",
+          return KernelSignature("uniform_sr",
                                  {},
                                  {"ShapeTensor", "dtype", "min", "max", "seed"},
                                  {"Out"});
         } else {
-          return KernelSignature("uniform_random_sr",
+          return KernelSignature("uniform_sr",
                                  {},
                                  {"shape", "dtype", "min", "max", "seed"},
                                  {"Out"});
@@ -156,4 +154,6 @@ KernelSignature UniformRandomOpArgumentMapping(
 }
 }  // namespace phi
 
+PD_REGISTER_BASE_KERNEL_NAME(uniform_random, uniform);
+
 PD_REGISTER_ARG_MAPPING_FN(uniform_random, phi::UniformRandomOpArgumentMapping);
diff --git a/paddle/phi/ops/compat/where_index_sig.cc b/paddle/phi/ops/compat/where_index_sig.cc
new file mode 100644
index 0000000000..cfe2a8110c
--- /dev/null
+++ b/paddle/phi/ops/compat/where_index_sig.cc
@@ -0,0 +1,27 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/core/compat/op_utils.h"
+
+namespace phi {
+
+KernelSignature WhereIndexOpArgumentMapping(const ArgumentMappingContext& ctx) {
+  return KernelSignature("nonzero", {"Condition"}, {}, {"Out"});
+}
+
+}  // namespace phi
+
+PD_REGISTER_BASE_KERNEL_NAME(where_index, nonzero);
+
+PD_REGISTER_ARG_MAPPING_FN(where_index, phi::WhereIndexOpArgumentMapping);
diff --git a/paddle/phi/ops/compat/yolov3_loss_sig.cc b/paddle/phi/ops/compat/yolov3_loss_sig.cc
index cdd3ace81a..f98709a9fd 100644
--- a/paddle/phi/ops/compat/yolov3_loss_sig.cc
+++ b/paddle/phi/ops/compat/yolov3_loss_sig.cc
@@ -17,7 +17,7 @@
 namespace phi {
 
 KernelSignature Yolov3LossOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("yolov3_loss",
+  return KernelSignature("yolo_loss",
                          {"X", "GTBox", "GTLabel", "GTScore"},
                          {"anchors",
                           "anchor_mask",
@@ -32,7 +32,7 @@ KernelSignature Yolov3LossOpArgumentMapping(const ArgumentMappingContext& ctx) {
 KernelSignature Yolov3LossGradOpArgumentMapping(
     const ArgumentMappingContext& ctx) {
   return KernelSignature(
-      "yolov3_loss_grad",
+      "yolo_loss_grad",
       {"X",
        "GTBox",
        "GTLabel",
@@ -51,6 +51,9 @@ KernelSignature Yolov3LossGradOpArgumentMapping(
 }
 }  // namespace phi
 
+PD_REGISTER_BASE_KERNEL_NAME(yolov3_loss, yolo_loss);
+PD_REGISTER_BASE_KERNEL_NAME(yolov3_loss_grad, yolo_loss_grad);
+
 PD_REGISTER_ARG_MAPPING_FN(yolov3_loss, phi::Yolov3LossOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(yolov3_loss_grad,
                            phi::Yolov3LossGradOpArgumentMapping);
diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py
index 5cf54f2213..052564e787 100644
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -309,7 +309,7 @@ class UniformInitializer(Initializer):
 
         if framework._non_static_mode():
             if in_dygraph_mode():
-                out_var = _C_ops.uniform_random(
+                out_var = _C_ops.uniform(
                     var.shape,
                     out_dtype,
                     self._low,
@@ -711,7 +711,7 @@ class XavierInitializer(Initializer):
             if self._uniform:
                 limit = math.sqrt(6.0 / float(fan_in + fan_out))
                 if in_dygraph_mode():
-                    out_var = _C_ops.uniform_random(
+                    out_var = _C_ops.uniform(
                         out_var.shape,
                         out_dtype,
                         -limit,
@@ -923,7 +923,7 @@ class MSRAInitializer(Initializer):
                 gain = calculate_gain(self._nonlinearity, self._negative_slope)
                 limit = gain * math.sqrt(3.0 / float(fan_in))
                 if in_dygraph_mode():
-                    out_var = _C_ops.uniform_random(
+                    out_var = _C_ops.uniform(
                         var.shape,
                         out_dtype,
                         -limit,
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 69c00b5dbb..b1a49e23cd 100755
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -5385,7 +5385,7 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None):
                 )
             )
     if in_dygraph_mode():
-        return _C_ops.reduce_prod(
+        return _C_ops.prod(
             input,
             dim if dim != None and dim != [] else [0],
             keep_dim,
@@ -15548,7 +15548,7 @@ def where(condition):
     """
 
     if in_dygraph_mode():
-        return _C_ops.where_index(condition)
+        return _C_ops.nonzero(condition)
     if _in_legacy_dygraph():
         return _legacy_C_ops.where_index(condition)
 
@@ -16567,7 +16567,7 @@ def uniform_random(
 
     if in_dygraph_mode():
         shape = utils.convert_shape_to_list(shape)
-        return _C_ops.uniform_random(
+        return _C_ops.uniform(
             shape,
             dtype,
             float(min),
diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py
index b7e1045b6e..8a1c5b9f0b 100755
--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -1017,7 +1017,7 @@ def hsigmoid_loss(
             #  [1.92374969]]
     """
     if in_dygraph_mode():
-        out, _, _ = _C_ops.hierarchical_sigmoid(
+        out, _, _ = _C_ops.hsigmoid_loss(
             input,
             weight,
             label,
diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
index 8f44f3ffe1..32f63c6f23 100644
--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@@ -1097,7 +1097,7 @@ def tril(x, diagonal=0, name=None):
             #         [9 , 10, 0 , 0 ]])
     """
     if in_dygraph_mode():
-        return _C_ops.tril_triu(x, diagonal, True)
+        return _C_ops.tril(x, diagonal, True)
 
     if _in_legacy_dygraph():
         op = getattr(_legacy_C_ops, 'tril_triu')
@@ -1163,7 +1163,7 @@ def triu(x, diagonal=0, name=None):
 
     """
     if in_dygraph_mode():
-        return _C_ops.tril_triu(x, diagonal, False)
+        return _C_ops.tril(x, diagonal, False)
 
     if _in_legacy_dygraph():
         op = getattr(_legacy_C_ops, 'tril_triu')
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index f367215cd1..f34851fdcc 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -3706,7 +3706,7 @@ def prod(x, axis=None, keepdim=False, dtype=None, name=None):
             dim = [0]
 
     if in_dygraph_mode():
-        return _C_ops.reduce_prod(x, dim, keepdim, reduce_all)
+        return _C_ops.prod(x, dim, keepdim, reduce_all)
     if _in_legacy_dygraph():
         return _legacy_C_ops.reduce_prod(
             x, 'dim', dim, 'keep_dim', keepdim, 'reduce_all', reduce_all
diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py
index f5f448cf4e..d49941e199 100644
--- a/python/paddle/tensor/random.py
+++ b/python/paddle/tensor/random.py
@@ -584,7 +584,7 @@ def uniform(shape, dtype=None, min=-1.0, max=1.0, seed=0, name=None):
 
     if in_dygraph_mode():
         shape = utils.convert_shape_to_list(shape)
-        return _C_ops.uniform_random(
+        return _C_ops.uniform(
             shape,
             dtype,
             float(min),
@@ -664,7 +664,7 @@ def uniform_(x, min=-1.0, max=1.0, seed=0, name=None):
             #  [ 0.433519,    0.39483607, -0.8660099,   0.83664286]] # random
     """
     if in_dygraph_mode():
-        return _C_ops.uniform_random_inplace_(x, min, max, seed, 0, 0, 1.0)
+        return _C_ops.uniform_inplace_(x, min, max, seed, 0, 0, 1.0)
     else:
         return _legacy_C_ops.uniform_random_inplace_(
             x, 'min', min, 'max', max, 'seed', seed
diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py
index 165809b39f..355bc63f03 100644
--- a/python/paddle/tensor/search.py
+++ b/python/paddle/tensor/search.py
@@ -429,7 +429,7 @@ def nonzero(x, as_tuple=False):
     rank = len(shape)
 
     if in_dygraph_mode():
-        outs = _C_ops.where_index(x)
+        outs = _C_ops.nonzero(x)
     elif paddle.in_dynamic_mode():
         outs = _legacy_C_ops.where_index(x)
     else:
diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py
index 519ac1db4c..1e8fc049ef 100755
--- a/python/paddle/vision/ops.py
+++ b/python/paddle/vision/ops.py
@@ -197,7 +197,7 @@ def yolo_loss(
     """
 
     if in_dygraph_mode():
-        loss, _, _ = _C_ops.yolov3_loss(
+        loss, _, _ = _C_ops.yolo_loss(
             x,
             gt_box,
             gt_label,
-- 
GitLab