diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc
index 83824c810840189502105ca3dc95dbd42713d4c3..695c28d77554ac9235e69e5721ef5cfe9389de95 100644
--- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc
+++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc
@@ -29,7 +29,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class CSoftmaxWithCrossEntropyOp : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -468,7 +468,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::XPUContext, T> {
   }
 };
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class CSoftmaxWithCrossEntropyGrad : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -535,9 +535,13 @@ class CSoftmaxWithCrossEntropyGrad : public framework::OpKernel<T> {
 namespace ops = paddle::operators;
 namespace plat = paddle::platform;
 
-REGISTER_OP_XPU_KERNEL(c_softmax_with_cross_entropy,
-                       ops::CSoftmaxWithCrossEntropyOp<phi::XPUContext, float>);
-
-REGISTER_OP_XPU_KERNEL(
-    c_softmax_with_cross_entropy_grad,
-    ops::CSoftmaxWithCrossEntropyGrad<phi::XPUContext, float>);
+PD_REGISTER_STRUCT_KERNEL(c_softmax_with_cross_entropy,
+                          XPU,
+                          ALL_LAYOUT,
+                          ops::CSoftmaxWithCrossEntropyOp,
+                          float) {}
+PD_REGISTER_STRUCT_KERNEL(c_softmax_with_cross_entropy_grad,
+                          XPU,
+                          ALL_LAYOUT,
+                          ops::CSoftmaxWithCrossEntropyGrad,
+                          float) {}
diff --git a/paddle/fluid/operators/collective/c_split_op_xpu.cc b/paddle/fluid/operators/collective/c_split_op_xpu.cc
index bad920a11ff5ee4806a4e2094ec894cde27160fe..d573a83d708c4f79a3263bba714de4090f2574dc 100644
--- a/paddle/fluid/operators/collective/c_split_op_xpu.cc
+++ b/paddle/fluid/operators/collective/c_split_op_xpu.cc
@@ -22,7 +22,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class CSplitOpXPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -87,7 +87,10 @@ class CSplitOpXPUKernel : public framework::OpKernel<T> {
 namespace ops = paddle::operators;
 namespace plat = paddle::platform;
 
-REGISTER_OP_XPU_KERNEL(c_split,
-                       ops::CSplitOpXPUKernel<float>,
-                       ops::CSplitOpXPUKernel<int>,
-                       ops::CSplitOpXPUKernel<plat::float16>);
+PD_REGISTER_STRUCT_KERNEL(c_split,
+                          XPU,
+                          ALL_LAYOUT,
+                          ops::CSplitOpXPUKernel,
+                          float,
+                          int,
+                          plat::float16) {}
diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op_xpu.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op_xpu.cc
index 4917057059ffcc1a5186577ad3e6b8064e130bdd..0b432cab281fcf07423e60c264668b1719dfa72b 100644
--- a/paddle/fluid/operators/collective/c_sync_calc_stream_op_xpu.cc
+++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op_xpu.cc
@@ -17,5 +17,5 @@ limitations under the License. */
 namespace ops = paddle::operators;
 namespace plat = paddle::platform;
 
-REGISTER_OP_XPU_KERNEL(c_sync_calc_stream,
-                       ops::CSyncCalcStreamKernel<float, plat::XPUPlace>)
+PD_REGISTER_STRUCT_KERNEL(
+    c_sync_calc_stream, XPU, ALL_LAYOUT, ops::CSyncCalcStreamKernel, float) {}
diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op_xpu.cc b/paddle/fluid/operators/collective/c_sync_comm_stream_op_xpu.cc
index bbb3b62bf3cf6a6c3daa9efbe3a2eadf2c5af234..ce2c20d57f0b3f105b1eabb599af95cc5eba56b4 100644
--- a/paddle/fluid/operators/collective/c_sync_comm_stream_op_xpu.cc
+++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op_xpu.cc
@@ -17,5 +17,5 @@ limitations under the License. */
 namespace ops = paddle::operators;
 namespace plat = paddle::platform;
 
-REGISTER_OP_XPU_KERNEL(c_sync_comm_stream,
-                       ops::CSyncCommStreamKernel<float, plat::XPUPlace>);
+PD_REGISTER_STRUCT_KERNEL(
+    c_sync_comm_stream, XPU, ALL_LAYOUT, ops::CSyncCommStreamKernel, float) {}
diff --git a/paddle/fluid/operators/controlflow/logical_op_xpu.h b/paddle/fluid/operators/controlflow/logical_op_xpu.h
index 8afefd6837449375aa60d6e1be10e9e33c9739f2..614db61558f793185244fe066945fe61e9c9662d 100644
--- a/paddle/fluid/operators/controlflow/logical_op_xpu.h
+++ b/paddle/fluid/operators/controlflow/logical_op_xpu.h
@@ -156,7 +156,12 @@ class BinaryLogicalOpXPUKernel : public framework::OpKernel<T> {
   }
 };
 
-template <typename T>
+#define DEFINE_BINARY_LOGICAL_OP_XPU_KERNEL(op_name, xpu_type) \
+  template <typename T, typename DeviceContext>                \
+  class BinaryLogical##op_name##CPUKernel                      \
+      : public CReduceOpCPUKernel<xpu_type, T> {};
+
+template <typename T, typename DeviceContext>
 class UnaryLogicalOpXPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
diff --git a/paddle/fluid/operators/controlflow/logicaland_op_xpu.cc b/paddle/fluid/operators/controlflow/logicaland_op_xpu.cc
index 6248b6e0b063781eea00cb0f578a913914ee80d8..563317f209ebc03c43cfbd15cbebb1cdfcaeae08 100644
--- a/paddle/fluid/operators/controlflow/logicaland_op_xpu.cc
+++ b/paddle/fluid/operators/controlflow/logicaland_op_xpu.cc
@@ -14,14 +14,23 @@ limitations under the License. */
 
 #ifdef PADDLE_WITH_XPU
 #include "paddle/fluid/operators/controlflow/logical_op_xpu.h"
+
+namespace paddle {
+namespace operators {
+DEFINE_BINARY_LOGICAL_OP_XPU_KERNEL(AND, XpuLogicalType::XPU_AND);
+}  // namespace operators
+}  // namespace paddle
+
 namespace ops = paddle::operators;
-REGISTER_OP_XPU_KERNEL(
-    logical_and,
-    ops::BinaryLogicalOpXPUKernel<ops::XpuLogicalType::XPU_AND, bool>,
-    ops::BinaryLogicalOpXPUKernel<ops::XpuLogicalType::XPU_AND, int8_t>,
-    ops::BinaryLogicalOpXPUKernel<ops::XpuLogicalType::XPU_AND, int16_t>,
-    ops::BinaryLogicalOpXPUKernel<ops::XpuLogicalType::XPU_AND, int>,
-    ops::BinaryLogicalOpXPUKernel<ops::XpuLogicalType::XPU_AND, int64_t>,
-    ops::BinaryLogicalOpXPUKernel<ops::XpuLogicalType::XPU_AND, float>,
-    ops::BinaryLogicalOpXPUKernel<ops::XpuLogicalType::XPU_AND, double>);
+PD_REGISTER_STRUCT_KERNEL(logical_and,
+                          XPU,
+                          ALL_LAYOUT,
+                          ops::BinaryLogicalANDCPUKernel,
+                          bool,
+                          int8_t,
+                          int16_t,
+                          int,
+                          int64_t,
+                          float,
+                          double) {}
 #endif
diff --git a/paddle/fluid/operators/controlflow/logicalnot_op_xpu.cc b/paddle/fluid/operators/controlflow/logicalnot_op_xpu.cc
index b8336c7201c3b527b0ff6cb74476073ba3aa86b8..1431816810b1e8c18f98b360d1ab26a2069b9194 100644
--- a/paddle/fluid/operators/controlflow/logicalnot_op_xpu.cc
+++ b/paddle/fluid/operators/controlflow/logicalnot_op_xpu.cc
@@ -15,12 +15,15 @@ limitations under the License. */
 #ifdef PADDLE_WITH_XPU
 #include "paddle/fluid/operators/controlflow/logical_op_xpu.h"
 namespace ops = paddle::operators;
-REGISTER_OP_XPU_KERNEL(logicalnot,
-                       ops::UnaryLogicalOpXPUKernel<bool>,
-                       ops::UnaryLogicalOpXPUKernel<int8_t>,
-                       ops::UnaryLogicalOpXPUKernel<int16_t>,
-                       ops::UnaryLogicalOpXPUKernel<int>,
-                       ops::UnaryLogicalOpXPUKernel<int64_t>,
-                       ops::UnaryLogicalOpXPUKernel<float>,
-                       ops::UnaryLogicalOpXPUKernel<double>);
+PD_REGISTER_STRUCT_KERNEL(logicalnot,
+                          XPU,
+                          ALL_LAYOUT,
+                          ops::UnaryLogicalOpXPUKernel,
+                          bool,
+                          int8_t,
+                          int16_t,
+                          int,
+                          int64_t,
+                          float,
+                          double) {}
 #endif
diff --git a/paddle/fluid/operators/controlflow/logicalor_op_xpu.cc b/paddle/fluid/operators/controlflow/logicalor_op_xpu.cc
index 126596841a29f8a1796e828dd9549db163de8512..03c707222e44e83c166507e9e0b1ca3d5b197940 100644
--- a/paddle/fluid/operators/controlflow/logicalor_op_xpu.cc
+++ b/paddle/fluid/operators/controlflow/logicalor_op_xpu.cc
@@ -15,14 +15,22 @@ limitations under the License. */
 #ifdef PADDLE_WITH_XPU
 #include "paddle/fluid/operators/controlflow/logical_op_xpu.h"
 
+namespace paddle {
+namespace operators {
+DEFINE_BINARY_LOGICAL_OP_XPU_KERNEL(OR, XpuLogicalType::XPU_OR);
+}  // namespace operators
+}  // namespace paddle
+
 namespace ops = paddle::operators;
-REGISTER_OP_XPU_KERNEL(
-    logical_or,
-    ops::BinaryLogicalOpXPUKernel<ops::XpuLogicalType::XPU_OR, bool>,
-    ops::BinaryLogicalOpXPUKernel<ops::XpuLogicalType::XPU_OR, int8_t>,
-    ops::BinaryLogicalOpXPUKernel<ops::XpuLogicalType::XPU_OR, int16_t>,
-    ops::BinaryLogicalOpXPUKernel<ops::XpuLogicalType::XPU_OR, int>,
-    ops::BinaryLogicalOpXPUKernel<ops::XpuLogicalType::XPU_OR, int64_t>,
-    ops::BinaryLogicalOpXPUKernel<ops::XpuLogicalType::XPU_OR, float>,
-    ops::BinaryLogicalOpXPUKernel<ops::XpuLogicalType::XPU_OR, double>);
+PD_REGISTER_STRUCT_KERNEL(logical_or,
+                          XPU,
+                          ALL_LAYOUT,
+                          ops::BinaryLogicalORCPUKernel,
+                          bool,
+                          int8_t,
+                          int16_t,
+                          int,
+                          int64_t,
+                          float,
+                          double) {}
 #endif
diff --git a/paddle/fluid/operators/detection/iou_similarity_op_xpu.cc b/paddle/fluid/operators/detection/iou_similarity_op_xpu.cc
index 1dc669075b17af766a99e14ba70928e94fdf087c..27ffa64c2a89214b335b34a5985c75b8bb6652e5 100644
--- a/paddle/fluid/operators/detection/iou_similarity_op_xpu.cc
+++ b/paddle/fluid/operators/detection/iou_similarity_op_xpu.cc
@@ -19,7 +19,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class XPUIOUSimilarityKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -57,6 +57,7 @@ class XPUIOUSimilarityKernel : public framework::OpKernel<T> {
 namespace ops = paddle::operators;
 using XPU = paddle::platform::XPUDeviceContext;
 
-REGISTER_OP_XPU_KERNEL(iou_similarity, ops::XPUIOUSimilarityKernel<XPU, float>);
+PD_REGISTER_STRUCT_KERNEL(
+    iou_similarity, XPU, ALL_LAYOUT, ops::XPUIOUSimilarityKernel, float) {}
 
 #endif
diff --git a/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc b/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc
index 82b437b943cb43f7ef682355561e425642bd9ec0..6594df2f5164f4a9bc70a39a82a0c113942247e7 100644
--- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc
+++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc
@@ -22,7 +22,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class FusedGemmEpilogueXPUKernel : public framework::OpKernel<T> {
   using XPUType = typename XPUTypeTrait<T>::Type;
 
@@ -102,7 +102,7 @@ class FusedGemmEpilogueXPUKernel : public framework::OpKernel<T> {
   }
 };
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class FusedGemmEpilogueXPUGradKernel : public framework::OpKernel<T> {
   using XPUType = typename XPUTypeTrait<T>::Type;
 
@@ -227,15 +227,17 @@ class FusedGemmEpilogueXPUGradKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-
-REGISTER_OP_XPU_KERNEL(
-    fused_gemm_epilogue,
-    ops::FusedGemmEpilogueXPUKernel<phi::XPUContext, float>,
-    ops::FusedGemmEpilogueXPUKernel<phi::XPUContext,
-                                    paddle::platform::float16>);
-
-REGISTER_OP_XPU_KERNEL(
-    fused_gemm_epilogue_grad,
-    ops::FusedGemmEpilogueXPUGradKernel<phi::XPUContext, float>,
-    ops::FusedGemmEpilogueXPUGradKernel<phi::XPUContext,
-                                        paddle::platform::float16>);
+namespace plat = paddle::platform;
+
+PD_REGISTER_STRUCT_KERNEL(fused_gemm_epilogue,
+                          XPU,
+                          ALL_LAYOUT,
+                          ops::FusedGemmEpilogueXPUKernel,
+                          float,
+                          plat::float16) {}
+PD_REGISTER_STRUCT_KERNEL(fused_gemm_epilogue_grad,
+                          XPU,
+                          ALL_LAYOUT,
+                          ops::FusedGemmEpilogueXPUGradKernel,
+                          float,
+                          plat::float16) {}
diff --git a/paddle/fluid/operators/load_combine_op_xpu.cc b/paddle/fluid/operators/load_combine_op_xpu.cc
index 307c9042c543d1a9b14418723fc780eb65f085d9..d285af37cda98f9ec032438c2418286037f8b265 100644
--- a/paddle/fluid/operators/load_combine_op_xpu.cc
+++ b/paddle/fluid/operators/load_combine_op_xpu.cc
@@ -15,11 +15,12 @@ limitations under the License. */
 #include "paddle/fluid/operators/load_combine_op.h"
 
 namespace ops = paddle::operators;
-using XPUCtx = paddle::platform::XPUDeviceContext;
-
-REGISTER_OP_XPU_KERNEL(load_combine,
-                       ops::LoadCombineOpKernel<float, XPUCtx>,
-                       ops::LoadCombineOpKernel<double, XPUCtx>,
-                       ops::LoadCombineOpKernel<int, XPUCtx>,
-                       ops::LoadCombineOpKernel<int8_t, XPUCtx>,
-                       ops::LoadCombineOpKernel<int64_t, XPUCtx>);
+PD_REGISTER_STRUCT_KERNEL(load_combine,
+                          XPU,
+                          ALL_LAYOUT,
+                          ops::LoadCombineOpKernel,
+                          float,
+                          double,
+                          int,
+                          int8_t,
+                          int64_t) {}
diff --git a/paddle/fluid/operators/lod_reset_op.cc b/paddle/fluid/operators/lod_reset_op.cc
index aa5c6bc249cda11c0004ca0a63faf7cd90c394f6..ae464e7b47161701367f316db0c5277102b47550 100644
--- a/paddle/fluid/operators/lod_reset_op.cc
+++ b/paddle/fluid/operators/lod_reset_op.cc
@@ -259,13 +259,15 @@ PD_REGISTER_STRUCT_KERNEL(lod_reset,
                           int64_t) {}
 
 #ifdef PADDLE_WITH_XPU
-using XPUCtx = paddle::platform::XPUDeviceContext;
-REGISTER_OP_XPU_KERNEL(lod_reset,
-                       ops::LoDResetKernel<paddle::platform::float16, XPUCtx>,
-                       ops::LoDResetKernel<float, XPUCtx>,
-                       ops::LoDResetKernel<double, XPUCtx>,
-                       ops::LoDResetKernel<int, XPUCtx>,
-                       ops::LoDResetKernel<int64_t, XPUCtx>);
+PD_REGISTER_STRUCT_KERNEL(lod_reset,
+                          XPU,
+                          ALL_LAYOUT,
+                          ops::LoDResetKernel,
+                          plat::float16,
+                          float,
+                          double,
+                          int,
+                          int64_t) {}
 #endif
 
 PD_REGISTER_STRUCT_KERNEL(lod_reset_grad,
diff --git a/paddle/fluid/operators/log_loss_op_xpu.cc b/paddle/fluid/operators/log_loss_op_xpu.cc
index 87e6d42e98ad555d6f7e87c9650dce5350e7016f..6c0c7f30d8e4912a216b086ef3962227ad906f94 100644
--- a/paddle/fluid/operators/log_loss_op_xpu.cc
+++ b/paddle/fluid/operators/log_loss_op_xpu.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename DeviceContext, typename T, typename AttrType = T>
+template <typename T, typename DeviceContext, typename AttrType = T>
 class LogLossXPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -37,7 +37,7 @@ class LogLossXPUKernel : public framework::OpKernel<T> {
     PADDLE_ENFORCE_XDNN_SUCCESS(r, "log_loss");
   }
 };
-template <typename DeviceContext, typename T, typename AttrType = T>
+template <typename T, typename DeviceContext, typename AttrType = T>
 class LogLossGradXPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -67,10 +67,9 @@ class LogLossGradXPUKernel : public framework::OpKernel<T> {
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
-REGISTER_OP_XPU_KERNEL(
-    log_loss, ops::LogLossXPUKernel<paddle::platform::XPUDeviceContext, float>);
-REGISTER_OP_XPU_KERNEL(
-    log_loss_grad,
-    ops::LogLossGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
 
+PD_REGISTER_STRUCT_KERNEL(
+    log_loss, XPU, ALL_LAYOUT, ops::LogLossXPUKernel, float) {}
+PD_REGISTER_STRUCT_KERNEL(
+    log_loss_grad, XPU, ALL_LAYOUT, ops::LogLossGradXPUKernel, float) {}
 #endif
diff --git a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc
index 737228902b6e7b22ef107e8327c87b54f2aa708e..0ac30b3e8734718fb314acfea554bbe8a67f4fd6 100644
--- a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc
+++ b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc
@@ -21,7 +21,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class AccuracyXPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -73,8 +73,6 @@ class AccuracyXPUKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-REGISTER_OP_XPU_KERNEL(
-    accuracy,
-    ops::AccuracyXPUKernel<paddle::platform::XPUDeviceContext, float>);
-
+PD_REGISTER_STRUCT_KERNEL(
+    accuracy, XPU, ALL_LAYOUT, ops::AccuracyXPUKernel, float) {}
 #endif
diff --git a/paddle/fluid/operators/optimizers/lars_momentum_op_xpu.cc b/paddle/fluid/operators/optimizers/lars_momentum_op_xpu.cc
index 267e064b3065ed229e80a9598aa17bf9f3de461b..52b57252b0abedbef82a5fc188820064d9712862 100644
--- a/paddle/fluid/operators/optimizers/lars_momentum_op_xpu.cc
+++ b/paddle/fluid/operators/optimizers/lars_momentum_op_xpu.cc
@@ -20,7 +20,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class LarsMomentumOpXPUKernel : public framework::OpKernel<T> {
   using XPUType = typename XPUTypeTrait<T>::Type;
 
@@ -115,7 +115,11 @@ class LarsMomentumOpXPUKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-REGISTER_OP_XPU_KERNEL(lars_momentum,
-                       ops::LarsMomentumOpXPUKernel<paddle::platform::float16>,
-                       ops::LarsMomentumOpXPUKernel<float>);
+namespace plat = paddle::platform;
+PD_REGISTER_STRUCT_KERNEL(lars_momentum,
+                          XPU,
+                          ALL_LAYOUT,
+                          ops::LarsMomentumOpXPUKernel,
+                          float,
+                          plat::float16) {}
 #endif
diff --git a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc
index e250b5585da06a50603d32baec2dd30ba9f51aa9..b23fee1a012df725cb61416b18ce6c289bbbe652 100644
--- a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc
+++ b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc
@@ -70,6 +70,10 @@ class XPULogsumexpKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 
 namespace ops = paddle::operators;
+// This kernel can not be registered in phi, because op logsumexp should run
+// phi::LogsumexpKernel rather than XPULogsumexpKernel here. And if register
+// xpu logsumexp kernel in phi, op logsumexp will run XPULogsumexpKernel here
+// and raise error.
 REGISTER_OP_XPU_KERNEL(
     logsumexp,
     ops::XPULogsumexpKernel<paddle::platform::XPUDeviceContext, float>);