Register fluid kerenls to phi [part 10] (#53034)

* update * update * Revert "update" * fix bug * update

Register fluid kerenls to phi [part 10] (#53034)
* update * update * Revert "update" * fix bug * update
1c12f2f3 · huangjiyi · GitHub · 20a66bbf · 1c12f2f3 · 1c12f2f3
20 changed file
--- a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
@@ -214,6 +214,9 @@ void DataTranferHelper::RunAndConstructOpFuncNode(
          *(op_with_kernel->PhiKernelSignature()),
          runtime_context,
          *dev_ctx);
+    } else if (new_op_func_node.phi_kernel_->GetKernelRegisteredType() ==
+               phi::KernelRegisteredType::STRUCTURE) {
+      (*new_op_func_node.phi_kernel_)(&exec_ctx);
    } else {
      phi::KernelContext phi_kernel_context;
      op_with_kernel->BuildPhiKernelContext(

--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -2468,7 +2468,7 @@ Scope* OperatorWithKernel::PrepareData(
                                                 expected_kernel_key.layout(),
                                                 expected_kernel_key.dtype());
          }
-        } else if (in_def != nullptr &&
+        } else if (in_def != nullptr &&  // KernelRegisteredType is Function
                   in_def->backend != phi::Backend::ALL_BACKEND) {
          auto tensor_backend = phi::TransToPhiBackend(tensor_in->place());
          if ((in_def->backend != tensor_backend &&

--- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc
@@ -149,9 +149,15 @@ REGISTER_OPERATOR(
 REGISTER_OPERATOR(sequence_softmax_grad,
                  ops::SequenceSoftmaxGradOp,
                  ops::SequenceSoftmaxGradOpNoNeedBufferVarsInferer);
-REGISTER_OP_CPU_KERNEL(sequence_softmax,
-                       ops::SequenceSoftmaxKernel<phi::CPUContext, float>,
-                       ops::SequenceSoftmaxKernel<phi::CPUContext, double>);
-REGISTER_OP_CPU_KERNEL(sequence_softmax_grad,
-                       ops::SequenceSoftmaxGradKernel<phi::CPUContext, float>,
-                       ops::SequenceSoftmaxGradKernel<phi::CPUContext, double>);
+PD_REGISTER_STRUCT_KERNEL(sequence_softmax,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::SequenceSoftmaxKernel,
+                          float,
+                          double) {}
+PD_REGISTER_STRUCT_KERNEL(sequence_softmax_grad,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::SequenceSoftmaxGradKernel,
+                          float,
+                          double) {}
--- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu
@@ -177,10 +177,15 @@ struct SequenceSoftmaxGradFunctor<phi::GPUContext, T> {
 }  // namespace paddle

 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(sequence_softmax,
-                        ops::SequenceSoftmaxKernel<phi::GPUContext, float>,
-                        ops::SequenceSoftmaxKernel<phi::GPUContext, double>);
-REGISTER_OP_CUDA_KERNEL(
-    sequence_softmax_grad,
-    ops::SequenceSoftmaxGradKernel<phi::GPUContext, float>,
-    ops::SequenceSoftmaxGradKernel<phi::GPUContext, double>);
+PD_REGISTER_STRUCT_KERNEL(sequence_softmax,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::SequenceSoftmaxKernel,
+                          float,
+                          double) {}
+PD_REGISTER_STRUCT_KERNEL(sequence_softmax_grad,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::SequenceSoftmaxGradKernel,
+                          float,
+                          double) {}
--- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
@@ -86,7 +86,7 @@ struct SequenceSoftmaxGradFunctor<phi::CPUContext, T> {
  }
 };

-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SequenceSoftmaxKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
@@ -130,7 +130,7 @@ class SequenceSoftmaxKernel : public framework::OpKernel<T> {
  }
 };

-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SequenceSoftmaxGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {

--- a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.cc
@@ -137,9 +137,13 @@ REGISTER_OPERATOR(
    ops::SequenceTopkAvgPoolGradOpMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(sequence_topk_avg_pooling_grad,
                  ops::SequenceTopkAvgPoolingGradOp);
-REGISTER_OP_CPU_KERNEL(
-    sequence_topk_avg_pooling,
-    ops::SequenceTopkAvgPoolingKernel<phi::CPUContext, float>);
-REGISTER_OP_CPU_KERNEL(
-    sequence_topk_avg_pooling_grad,
-    ops::SequenceTopkAvgPoolingGradKernel<phi::CPUContext, float>);
+PD_REGISTER_STRUCT_KERNEL(sequence_topk_avg_pooling,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::SequenceTopkAvgPoolingKernel,
+                          float) {}
+PD_REGISTER_STRUCT_KERNEL(sequence_topk_avg_pooling_grad,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::SequenceTopkAvgPoolingGradKernel,
+                          float) {}
--- a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h
@@ -67,7 +67,7 @@ static void get_topk_pos(const T* data, int length, int k, int* pos) {
 }
 }  // namespace details

-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SequenceTopkAvgPoolingKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
@@ -178,7 +178,7 @@ class SequenceTopkAvgPoolingKernel : public framework::OpKernel<T> {
  }
 };

-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SequenceTopkAvgPoolingGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {

--- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc
@@ -194,14 +194,19 @@ REGISTER_OPERATOR(sequence_unpad,
 REGISTER_OPERATOR(sequence_unpad_grad,
                  ops::SequenceUnpadGradOp,
                  ops::SequenceUnpadGradOpNoNeedBufferVarsInferer);
-REGISTER_OP_CPU_KERNEL(sequence_unpad,
-                       ops::SequenceUnpadOpKernel<phi::CPUContext, float>,
-                       ops::SequenceUnpadOpKernel<phi::CPUContext, double>,
-                       ops::SequenceUnpadOpKernel<phi::CPUContext, int>,
-                       ops::SequenceUnpadOpKernel<phi::CPUContext, int64_t>);
-REGISTER_OP_CPU_KERNEL(
-    sequence_unpad_grad,
-    ops::SequenceUnpadGradOpKernel<phi::CPUContext, float>,
-    ops::SequenceUnpadGradOpKernel<phi::CPUContext, double>,
-    ops::SequenceUnpadGradOpKernel<phi::CPUContext, int>,
-    ops::SequenceUnpadGradOpKernel<phi::CPUContext, int64_t>);
+PD_REGISTER_STRUCT_KERNEL(sequence_unpad,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::SequenceUnpadOpKernel,
+                          float,
+                          double,
+                          int,
+                          int64_t) {}
+PD_REGISTER_STRUCT_KERNEL(sequence_unpad_grad,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::SequenceUnpadGradOpKernel,
+                          float,
+                          double,
+                          int,
+                          int64_t) {}
--- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cu
@@ -15,14 +15,19 @@ limitations under the License. */
 #include "paddle/fluid/operators/sequence_ops/sequence_unpad_op.h"

 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(sequence_unpad,
-                        ops::SequenceUnpadOpKernel<phi::GPUContext, float>,
-                        ops::SequenceUnpadOpKernel<phi::GPUContext, double>,
-                        ops::SequenceUnpadOpKernel<phi::GPUContext, int>,
-                        ops::SequenceUnpadOpKernel<phi::GPUContext, int64_t>);
-REGISTER_OP_CUDA_KERNEL(
-    sequence_unpad_grad,
-    ops::SequenceUnpadGradOpKernel<phi::GPUContext, float>,
-    ops::SequenceUnpadGradOpKernel<phi::GPUContext, double>,
-    ops::SequenceUnpadGradOpKernel<phi::GPUContext, int>,
-    ops::SequenceUnpadGradOpKernel<phi::GPUContext, int64_t>);
+PD_REGISTER_STRUCT_KERNEL(sequence_unpad,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::SequenceUnpadOpKernel,
+                          float,
+                          double,
+                          int,
+                          int64_t) {}
+PD_REGISTER_STRUCT_KERNEL(sequence_unpad_grad,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::SequenceUnpadGradOpKernel,
+                          float,
+                          double,
+                          int,
+                          int64_t) {}
--- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h
@@ -27,7 +27,7 @@ namespace operators {
 using LoDTensor = phi::DenseTensor;
 using LoD = framework::LoD;

-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SequenceUnpadOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
@@ -81,7 +81,7 @@ class SequenceUnpadOpKernel : public framework::OpKernel<T> {
  }
 };

-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SequenceUnpadGradOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {

--- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op_xpu.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_unpad_op_xpu.cc
@@ -18,6 +18,6 @@ limitations under the License. */

 namespace ops = paddle::operators;
 REGISTER_OP_XPU_KERNEL(sequence_unpad,
-                       ops::SequenceUnpadOpKernel<phi::XPUContext, float>);
+                       ops::SequenceUnpadOpKernel<float, phi::XPUContext>);

 #endif
--- a/paddle/fluid/operators/share_data_op.cc
+++ b/paddle/fluid/operators/share_data_op.cc
@@ -62,18 +62,22 @@ Return a tensor $Out$ that shares data with the input tensor $X$ and without ten
 }  // namespace paddle

 namespace ops = paddle::operators;
+namespace plat = paddle::platform;
 REGISTER_OPERATOR(
    share_data,
    ops::ShareDataOp,
    ops::ShareDataOpMaker,
    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
-REGISTER_OP_CPU_KERNEL(share_data,
-                       ops::ShareDataKernel<bool>,
-                       ops::ShareDataKernel<int>,
-                       ops::ShareDataKernel<int8_t>,
-                       ops::ShareDataKernel<uint8_t>,
-                       ops::ShareDataKernel<paddle::platform::float16>,
-                       ops::ShareDataKernel<int64_t>,
-                       ops::ShareDataKernel<float>,
-                       ops::ShareDataKernel<double>)
+PD_REGISTER_STRUCT_KERNEL(share_data,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::ShareDataKernel,
+                          bool,
+                          int,
+                          int8_t,
+                          uint8_t,
+                          int64_t,
+                          float,
+                          double,
+                          plat::float16) {}
--- a/paddle/fluid/operators/share_data_op.cu
+++ b/paddle/fluid/operators/share_data_op.cu
@@ -14,13 +14,17 @@ limitations under the License. */

 #include "paddle/fluid/operators/share_data_op.h"

-REGISTER_OP_CUDA_KERNEL(
-    share_data,
-    paddle::operators::ShareDataKernel<bool>,
-    paddle::operators::ShareDataKernel<int>,
-    paddle::operators::ShareDataKernel<int8_t>,
-    paddle::operators::ShareDataKernel<uint8_t>,
-    paddle::operators::ShareDataKernel<paddle::platform::float16>,
-    paddle::operators::ShareDataKernel<int64_t>,
-    paddle::operators::ShareDataKernel<float>,
-    paddle::operators::ShareDataKernel<double>);
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+PD_REGISTER_STRUCT_KERNEL(share_data,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::ShareDataKernel,
+                          bool,
+                          int,
+                          int8_t,
+                          uint8_t,
+                          int64_t,
+                          float,
+                          double,
+                          plat::float16) {}
--- a/paddle/fluid/operators/share_data_op.h
+++ b/paddle/fluid/operators/share_data_op.h
@@ -18,7 +18,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {

-template <typename T>
+template <typename T, typename DeviceContext>
 class ShareDataKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {

--- a/paddle/fluid/operators/shuffle_batch_op.cc
+++ b/paddle/fluid/operators/shuffle_batch_op.cc
@@ -159,14 +159,19 @@ REGISTER_OPERATOR(shuffle_batch,
                  ops::ShuffleBatchGradOpMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(shuffle_batch_grad, ops::ShuffleBatchOpGrad);

-REGISTER_OP_CPU_KERNEL(shuffle_batch,
-                       ops::ShuffleBatchKernel<float>,
-                       ops::ShuffleBatchKernel<double>,
-                       ops::ShuffleBatchKernel<int32_t>,
-                       ops::ShuffleBatchKernel<int64_t>);
-
-REGISTER_OP_CPU_KERNEL(shuffle_batch_grad,
-                       ops::ShuffleBatchGradKernel<float>,
-                       ops::ShuffleBatchGradKernel<double>,
-                       ops::ShuffleBatchGradKernel<int32_t>,
-                       ops::ShuffleBatchGradKernel<int64_t>);
+PD_REGISTER_STRUCT_KERNEL(shuffle_batch,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::ShuffleBatchKernel,
+                          float,
+                          double,
+                          int32_t,
+                          int64_t) {}
+PD_REGISTER_STRUCT_KERNEL(shuffle_batch_grad,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::ShuffleBatchGradKernel,
+                          float,
+                          double,
+                          int32_t,
+                          int64_t) {}
--- a/paddle/fluid/operators/shuffle_batch_op.cu
+++ b/paddle/fluid/operators/shuffle_batch_op.cu
@@ -79,7 +79,7 @@ struct ReorderFunctor {
  int64_t stride_;
 };

-template <typename T>
+template <typename T, typename DeviceContext>
 class ShuffleBatchCUDAKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
@@ -149,7 +149,7 @@ class ShuffleBatchCUDAKernel : public framework::OpKernel<T> {
  }
 };

-template <typename T>
+template <typename T, typename DeviceContext>
 class ShuffleBatchGradCUDAKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
@@ -180,15 +180,21 @@ class ShuffleBatchGradCUDAKernel : public framework::OpKernel<T> {
 }  // namespace paddle

 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(shuffle_batch,
-                        ops::ShuffleBatchCUDAKernel<float>,
-                        ops::ShuffleBatchCUDAKernel<double>,
-                        ops::ShuffleBatchCUDAKernel<int32_t>,
-                        ops::ShuffleBatchCUDAKernel<int64_t>);
-
-REGISTER_OP_CUDA_KERNEL(shuffle_batch_grad,
-                        ops::ShuffleBatchGradCUDAKernel<float>,
-                        ops::ShuffleBatchGradCUDAKernel<double>,
-                        ops::ShuffleBatchGradCUDAKernel<int32_t>,
-                        ops::ShuffleBatchGradCUDAKernel<int64_t>);
+
+PD_REGISTER_STRUCT_KERNEL(shuffle_batch,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::ShuffleBatchCUDAKernel,
+                          float,
+                          double,
+                          int32_t,
+                          int64_t) {}
+PD_REGISTER_STRUCT_KERNEL(shuffle_batch_grad,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::ShuffleBatchGradCUDAKernel,
+                          float,
+                          double,
+                          int32_t,
+                          int64_t) {}
 #endif
--- a/paddle/fluid/operators/shuffle_batch_op.h
+++ b/paddle/fluid/operators/shuffle_batch_op.h
@@ -36,7 +36,7 @@ namespace operators {
 template <typename T>
 using Vector = phi::Vector<T>;

-template <typename T>
+template <typename T, typename DeviceContext>
 class ShuffleBatchKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {
@@ -122,7 +122,7 @@ class ShuffleBatchKernel : public framework::OpKernel<T> {
  }
 };

-template <typename T>
+template <typename T, typename DeviceContext>
 class ShuffleBatchGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {

--- a/paddle/fluid/operators/shuffle_channel_op.cc
+++ b/paddle/fluid/operators/shuffle_channel_op.cc
@@ -123,11 +123,15 @@ REGISTER_OPERATOR(shuffle_channel,

 REGISTER_OPERATOR(shuffle_channel_grad, ops::ShuffleChannelGradOp);

-REGISTER_OP_CPU_KERNEL(shuffle_channel,
-                       ops::ShuffleChannelOpKernel<phi::CPUContext, float>,
-                       ops::ShuffleChannelOpKernel<phi::CPUContext, double>);
-
-REGISTER_OP_CPU_KERNEL(
-    shuffle_channel_grad,
-    ops::ShuffleChannelGradOpKernel<phi::CPUContext, float>,
-    ops::ShuffleChannelGradOpKernel<phi::CPUContext, double>);
+PD_REGISTER_STRUCT_KERNEL(shuffle_channel,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::ShuffleChannelOpKernel,
+                          float,
+                          double) {}
+PD_REGISTER_STRUCT_KERNEL(shuffle_channel_grad,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::ShuffleChannelGradOpKernel,
+                          float,
+                          double) {}
--- a/paddle/fluid/operators/shuffle_channel_op.cu
+++ b/paddle/fluid/operators/shuffle_channel_op.cu
@@ -43,7 +43,7 @@ __global__ void ShuffleChannel(const int nthreads,
    p_o[k] = input[index];
  }
 }
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class ShuffleChannelOpCUDAKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
@@ -82,7 +82,7 @@ class ShuffleChannelOpCUDAKernel : public framework::OpKernel<T> {
  }
 };

-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class ShuffleChannelGradOpCUDAKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
@@ -126,11 +126,15 @@ class ShuffleChannelGradOpCUDAKernel : public framework::OpKernel<T> {
 }  // namespace paddle

 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
-    shuffle_channel,
-    ops::ShuffleChannelOpCUDAKernel<phi::GPUContext, float>,
-    ops::ShuffleChannelOpCUDAKernel<phi::GPUContext, double>);
-REGISTER_OP_CUDA_KERNEL(
-    shuffle_channel_grad,
-    ops::ShuffleChannelGradOpCUDAKernel<phi::GPUContext, float>,
-    ops::ShuffleChannelGradOpCUDAKernel<phi::GPUContext, double>);
+PD_REGISTER_STRUCT_KERNEL(shuffle_channel,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::ShuffleChannelOpCUDAKernel,
+                          float,
+                          double) {}
+PD_REGISTER_STRUCT_KERNEL(shuffle_channel_grad,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::ShuffleChannelGradOpCUDAKernel,
+                          float,
+                          double) {}
--- a/paddle/fluid/operators/shuffle_channel_op.h
+++ b/paddle/fluid/operators/shuffle_channel_op.h
@@ -19,7 +19,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {

-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class ShuffleChannelOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
@@ -54,7 +54,7 @@ class ShuffleChannelOpKernel : public framework::OpKernel<T> {
  }
 };

-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class ShuffleChannelGradOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {