Register fluid kerenls to phi [part 11] (#53035)

* update * fix bug * fix bug * fix bug * fix bug

Register fluid kerenls to phi [part 11] (#53035)
* update * fix bug * fix bug * fix bug * fix bug
abc44b40 · huangjiyi · GitHub · 01de2fc9 · abc44b40 · abc44b40
19 changed file
--- a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc
+++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc
@@ -262,10 +262,15 @@ REGISTER_OPERATOR(sigmoid_focal_loss,
                  ops::SigmoidFocalLossGradOpMaker<paddle::framework::OpDesc>,
                  ops::SigmoidFocalLossGradOpMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(sigmoid_focal_loss_grad, ops::SigmoidFocalLossGradOp);
-REGISTER_OP_CPU_KERNEL(sigmoid_focal_loss,
+PD_REGISTER_STRUCT_KERNEL(sigmoid_focal_loss,
-                       ops::SigmoidFocalLossKernel<phi::CPUContext, float>,
+                          CPU,
-                       ops::SigmoidFocalLossKernel<phi::CPUContext, double>);
+                          ALL_LAYOUT,
-REGISTER_OP_CPU_KERNEL(
+                          ops::SigmoidFocalLossKernel,
-    sigmoid_focal_loss_grad,
+                          float,
-    ops::SigmoidFocalLossGradKernel<phi::CPUContext, float>,
+                          double) {}
-    ops::SigmoidFocalLossGradKernel<phi::CPUContext, double>);
+PD_REGISTER_STRUCT_KERNEL(sigmoid_focal_loss_grad,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::SigmoidFocalLossGradKernel,
+                          float,
+                          double) {}
--- a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu
+++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu
@@ -117,7 +117,7 @@ __global__ void GPUSigmoidFocalLossBackward(const T *x_data,
  }
 }
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class GPUSigmoidFocalLossKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {
@@ -148,7 +148,7 @@ class GPUSigmoidFocalLossKernel : public framework::OpKernel<T> {
  }
 };
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class GPUSigmoidFocalLossGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {
@@ -187,11 +187,15 @@ class GPUSigmoidFocalLossGradKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
+PD_REGISTER_STRUCT_KERNEL(sigmoid_focal_loss,
-    sigmoid_focal_loss,
+                          GPU,
-    ops::GPUSigmoidFocalLossKernel<phi::GPUContext, float>,
+                          ALL_LAYOUT,
-    ops::GPUSigmoidFocalLossKernel<phi::GPUContext, double>);
+                          ops::GPUSigmoidFocalLossKernel,
-REGISTER_OP_CUDA_KERNEL(
+                          float,
-    sigmoid_focal_loss_grad,
+                          double) {}
-    ops::GPUSigmoidFocalLossGradKernel<phi::GPUContext, float>,
+PD_REGISTER_STRUCT_KERNEL(sigmoid_focal_loss_grad,
-    ops::GPUSigmoidFocalLossGradKernel<phi::GPUContext, double>);
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::GPUSigmoidFocalLossGradKernel,
+                          float,
+                          double) {}
--- a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h
+++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h
@@ -22,7 +22,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SigmoidFocalLossKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {
@@ -73,7 +73,7 @@ class SigmoidFocalLossKernel : public framework::OpKernel<T> {
  }
 };
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SigmoidFocalLossGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {

--- a/paddle/fluid/operators/fused/skip_layernorm_op.cu
+++ b/paddle/fluid/operators/fused/skip_layernorm_op.cu
@@ -25,7 +25,7 @@
 namespace paddle {
 namespace operators {
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SkipLayerNormKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {
@@ -89,13 +89,16 @@ class SkipLayerNormKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 namespace ops = paddle::operators;
+namespace plat = paddle::platform;
 #if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 10000
-REGISTER_OP_CUDA_KERNEL(
+PD_REGISTER_STRUCT_KERNEL(skip_layernorm,
-    skip_layernorm,
+                          GPU,
-    ops::SkipLayerNormKernel<phi::GPUContext, float>,
+                          ALL_LAYOUT,
-    ops::SkipLayerNormKernel<phi::GPUContext, paddle::platform::float16>);
+                          ops::SkipLayerNormKernel,
+                          float,
+                          plat::float16) {}
 #else
-REGISTER_OP_CUDA_KERNEL(skip_layernorm,
+PD_REGISTER_STRUCT_KERNEL(
-                        ops::SkipLayerNormKernel<phi::GPUContext, float>);
+    skip_layernorm, GPU, ALL_LAYOUT, ops::SkipLayerNormKernel, float) {}
 #endif
--- a/paddle/fluid/operators/optimizers/sparse_momentum_op.cc
+++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.cc
@@ -118,6 +118,9 @@ REGISTER_OPERATOR(
    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
    ops::SparseMomentumOpInferVarType);
-REGISTER_OP_CPU_KERNEL(sparse_momentum,
+PD_REGISTER_STRUCT_KERNEL(sparse_momentum,
-                       ops::SparseMomentumOpKernel<phi::CPUContext, float>,
+                          CPU,
-                       ops::SparseMomentumOpKernel<phi::CPUContext, double>);
+                          ALL_LAYOUT,
+                          ops::SparseMomentumOpKernel,
+                          float,
+                          double) {}
--- a/paddle/fluid/operators/optimizers/sparse_momentum_op.cu
+++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.cu
@@ -17,8 +17,11 @@
 #include "paddle/fluid/platform/float16.h"
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
+namespace plat = paddle::platform;
-    sparse_momentum,
+PD_REGISTER_STRUCT_KERNEL(sparse_momentum,
-    ops::SparseMomentumOpKernel<phi::GPUContext, float>,
+                          GPU,
-    ops::SparseMomentumOpKernel<phi::GPUContext, double>,
+                          ALL_LAYOUT,
-    ops::SparseMomentumOpKernel<phi::GPUContext, paddle::platform::float16>);
+                          ops::SparseMomentumOpKernel,
+                          float,
+                          double,
+                          plat::float16) {}
--- a/paddle/fluid/operators/optimizers/sparse_momentum_op.h
+++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.h
@@ -295,7 +295,7 @@ class IndexMomentumFunctor {
  }
 };
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SparseMomentumOpKernel : public framework::OpKernel<T> {
  using MPDType = MultiPrecisionType<T>;

--- a/paddle/fluid/operators/similarity_focus_op.cc
+++ b/paddle/fluid/operators/similarity_focus_op.cc
@@ -91,6 +91,9 @@ REGISTER_OPERATOR(
    ops::SimilarityFocusOpMaker,
    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
-REGISTER_OP_CPU_KERNEL(similarity_focus,
+PD_REGISTER_STRUCT_KERNEL(similarity_focus,
-                       ops::SimilarityFocusKernel<float>,
+                          CPU,
-                       ops::SimilarityFocusKernel<double>);
+                          ALL_LAYOUT,
+                          ops::SimilarityFocusKernel,
+                          float,
+                          double) {}
--- a/paddle/fluid/operators/similarity_focus_op.h
+++ b/paddle/fluid/operators/similarity_focus_op.h
@@ -25,7 +25,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-template <typename T>
+template <typename T, typename DeviceContext>
 class SimilarityFocusKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {

--- a/paddle/fluid/operators/smooth_l1_loss_op.cc
+++ b/paddle/fluid/operators/smooth_l1_loss_op.cc
@@ -225,7 +225,7 @@ REGISTER_OPERATOR(smooth_l1_loss,
                  ops::SmoothL1LossGradMaker<paddle::framework::OpDesc>,
                  ops::SmoothL1LossGradMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(smooth_l1_loss_grad, ops::SmoothL1LossGradOp);
-REGISTER_OP_CPU_KERNEL(smooth_l1_loss,
+PD_REGISTER_STRUCT_KERNEL(
-                       ops::SmoothL1LossKernel<phi::CPUContext, float>);
+    smooth_l1_loss, CPU, ALL_LAYOUT, ops::SmoothL1LossKernel, float) {}
-REGISTER_OP_CPU_KERNEL(smooth_l1_loss_grad,
+PD_REGISTER_STRUCT_KERNEL(
-                       ops::SmoothL1LossGradKernel<phi::CPUContext, float>);
+    smooth_l1_loss_grad, CPU, ALL_LAYOUT, ops::SmoothL1LossGradKernel, float) {}
--- a/paddle/fluid/operators/smooth_l1_loss_op.cu
+++ b/paddle/fluid/operators/smooth_l1_loss_op.cu
@@ -14,7 +14,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/smooth_l1_loss_op.h"
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(smooth_l1_loss,
+PD_REGISTER_STRUCT_KERNEL(
-                        ops::SmoothL1LossKernel<phi::GPUContext, float>);
+    smooth_l1_loss, GPU, ALL_LAYOUT, ops::SmoothL1LossKernel, float) {}
-REGISTER_OP_CUDA_KERNEL(smooth_l1_loss_grad,
+PD_REGISTER_STRUCT_KERNEL(
-                        ops::SmoothL1LossGradKernel<phi::GPUContext, float>);
+    smooth_l1_loss_grad, GPU, ALL_LAYOUT, ops::SmoothL1LossGradKernel, float) {}
--- a/paddle/fluid/operators/smooth_l1_loss_op.h
+++ b/paddle/fluid/operators/smooth_l1_loss_op.h
@@ -45,7 +45,7 @@ struct SmoothL1LossForward {
  T sigma2;
 };
-template <typename DeviceContext, typename T, typename AttrType = T>
+template <typename T, typename DeviceContext, typename AttrType = T>
 class SmoothL1LossKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
@@ -116,7 +116,7 @@ struct SmoothL1LossBackward {
  T sigma2;
 };
-template <typename DeviceContext, typename T, typename AttrType = T>
+template <typename T, typename DeviceContext, typename AttrType = T>
 class SmoothL1LossGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {

--- a/paddle/fluid/operators/space_to_depth_op.cc
+++ b/paddle/fluid/operators/space_to_depth_op.cc
@@ -224,13 +224,19 @@ REGISTER_OPERATOR(space_to_depth,
 REGISTER_OPERATOR(space_to_depth_grad,
                  ops::SpaceToDepthGradOp,
                  ops::SpaceToDepthGradOpNoBufferVarsInferer);
-REGISTER_OP_CPU_KERNEL(space_to_depth,
+PD_REGISTER_STRUCT_KERNEL(space_to_depth,
-                       ops::SpaceToDepthKernel<phi::CPUContext, float>,
+                          CPU,
-                       ops::SpaceToDepthKernel<phi::CPUContext, double>,
+                          ALL_LAYOUT,
-                       ops::SpaceToDepthKernel<phi::CPUContext, int>,
+                          ops::SpaceToDepthKernel,
-                       ops::SpaceToDepthKernel<phi::CPUContext, int64_t>);
+                          int,
-REGISTER_OP_CPU_KERNEL(space_to_depth_grad,
+                          int64_t,
-                       ops::SpaceToDepthGradKernel<phi::CPUContext, float>,
+                          float,
-                       ops::SpaceToDepthGradKernel<phi::CPUContext, double>,
+                          double) {}
-                       ops::SpaceToDepthGradKernel<phi::CPUContext, int>,
+PD_REGISTER_STRUCT_KERNEL(space_to_depth_grad,
-                       ops::SpaceToDepthGradKernel<phi::CPUContext, int64_t>);
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::SpaceToDepthGradKernel,
+                          int,
+                          int64_t,
+                          float,
+                          double) {}
--- a/paddle/fluid/operators/space_to_depth_op.cu
+++ b/paddle/fluid/operators/space_to_depth_op.cu
@@ -17,14 +17,19 @@
 namespace plat = paddle::platform;
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(space_to_depth,
+PD_REGISTER_STRUCT_KERNEL(space_to_depth,
-                        ops::SpaceToDepthKernel<phi::GPUContext, float>,
+                          GPU,
-                        ops::SpaceToDepthKernel<phi::GPUContext, double>,
+                          ALL_LAYOUT,
-                        ops::SpaceToDepthKernel<phi::GPUContext, int>,
+                          ops::SpaceToDepthKernel,
-                        ops::SpaceToDepthKernel<phi::GPUContext, int64_t>);
+                          int,
+                          int64_t,
-REGISTER_OP_CUDA_KERNEL(space_to_depth_grad,
+                          float,
-                        ops::SpaceToDepthGradKernel<phi::GPUContext, float>,
+                          double) {}
-                        ops::SpaceToDepthGradKernel<phi::GPUContext, double>,
+PD_REGISTER_STRUCT_KERNEL(space_to_depth_grad,
-                        ops::SpaceToDepthGradKernel<phi::GPUContext, int>,
+                          GPU,
-                        ops::SpaceToDepthGradKernel<phi::GPUContext, int64_t>);
+                          ALL_LAYOUT,
+                          ops::SpaceToDepthGradKernel,
+                          int,
+                          int64_t,
+                          float,
+                          double) {}
--- a/paddle/fluid/operators/space_to_depth_op.h
+++ b/paddle/fluid/operators/space_to_depth_op.h
@@ -67,7 +67,7 @@ class space_to_depth_compute {
  T *out_;
 };
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SpaceToDepthKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {
@@ -96,7 +96,7 @@ class SpaceToDepthKernel : public framework::OpKernel<T> {
  }
 };
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SpaceToDepthGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {

--- a/paddle/fluid/operators/sparse_attention_op.cu
+++ b/paddle/fluid/operators/sparse_attention_op.cu
@@ -653,7 +653,7 @@ std::vector<phi::DenseTensor> GetSplitTensor(phi::DenseTensor* input) {
  return input->Split(1, 0);
 }
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SparseAttentionCUDAKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
@@ -773,7 +773,7 @@ class SparseAttentionCUDAKernel : public framework::OpKernel<T> {
  }
 };
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SparseAttentionGradCUDAKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
@@ -890,12 +890,17 @@ class SparseAttentionGradCUDAKernel : public framework::OpKernel<T> {
 }  // namespace operators
 }  // namespace paddle
-REGISTER_OP_CUDA_KERNEL(
-    sparse_attention,
+namespace ops = paddle::operators;
-    ops::SparseAttentionCUDAKernel<phi::GPUContext, float>,
+PD_REGISTER_STRUCT_KERNEL(sparse_attention,
-    ops::SparseAttentionCUDAKernel<phi::GPUContext, double>);
+                          GPU,
+                          ALL_LAYOUT,
-REGISTER_OP_CUDA_KERNEL(
+                          ops::SparseAttentionCUDAKernel,
-    sparse_attention_grad,
+                          float,
-    ops::SparseAttentionGradCUDAKernel<phi::GPUContext, float>,
+                          double) {}
-    ops::SparseAttentionGradCUDAKernel<phi::GPUContext, double>);
+PD_REGISTER_STRUCT_KERNEL(sparse_attention_grad,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::SparseAttentionGradCUDAKernel,
+                          float,
+                          double) {}
--- a/paddle/fluid/operators/spp_op.cc
+++ b/paddle/fluid/operators/spp_op.cc
@@ -109,9 +109,8 @@ REGISTER_OPERATOR(
    paddle::framework::DefaultGradOpMaker<paddle::framework::OpDesc, true>,
    paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>);
 REGISTER_OPERATOR(spp_grad, ops::SppOpGrad);
-REGISTER_OP_CPU_KERNEL(spp,
-                       ops::SppKernel<phi::CPUContext, float>,
+PD_REGISTER_STRUCT_KERNEL(spp, CPU, ALL_LAYOUT, ops::SppKernel, float, double) {
-                       ops::SppKernel<phi::CPUContext, double>);
+}
-REGISTER_OP_CPU_KERNEL(spp_grad,
+PD_REGISTER_STRUCT_KERNEL(
-                       ops::SppGradKernel<phi::CPUContext, float>,
+    spp_grad, CPU, ALL_LAYOUT, ops::SppGradKernel, float, double) {}
-                       ops::SppGradKernel<phi::CPUContext, double>);
--- a/paddle/fluid/operators/spp_op.cu.cc
+++ b/paddle/fluid/operators/spp_op.cu.cc
@@ -15,9 +15,8 @@ limitations under the License. */
 #include "paddle/fluid/operators/spp_op.h"
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(spp,
-                        ops::SppKernel<phi::GPUContext, float>,
+PD_REGISTER_STRUCT_KERNEL(spp, GPU, ALL_LAYOUT, ops::SppKernel, float, double) {
-                        ops::SppKernel<phi::GPUContext, double>);
+}
-REGISTER_OP_CUDA_KERNEL(spp_grad,
+PD_REGISTER_STRUCT_KERNEL(
-                        ops::SppGradKernel<phi::GPUContext, float>,
+    spp_grad, GPU, ALL_LAYOUT, ops::SppGradKernel, float, double) {}
-                        ops::SppGradKernel<phi::GPUContext, double>);
--- a/paddle/fluid/operators/spp_op.h
+++ b/paddle/fluid/operators/spp_op.h
@@ -24,7 +24,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SppKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
@@ -107,7 +107,7 @@ class SppKernel : public framework::OpKernel<T> {
    }
  }
 };
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class SppGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {