From d9edb233a84db480bc8ecdfcb623423a1d47c5e7 Mon Sep 17 00:00:00 2001
From: huangjiyi <43315610+huangjiyi@users.noreply.github.com>
Date: Wed, 19 Apr 2023 10:42:43 +0800
Subject: [PATCH] Register fluid kerenls to phi [part 13] (#53037)

* update

* fix bug

* update

* fix bug
---
 .../fluid/operators/fused/yolo_box_head_op.cu |  5 +-
 .../fluid/operators/fused/yolo_box_post_op.cu |  5 +-
 .../uniform_random_batch_size_like_op.cc      | 24 ++++++----
 .../uniform_random_batch_size_like_op.cu      | 12 +++--
 paddle/fluid/operators/unzip_op.cc            | 11 ++---
 paddle/fluid/operators/unzip_op.cu            | 47 +++++++++----------
 paddle/fluid/operators/unzip_op.h             |  4 +-
 7 files changed, 57 insertions(+), 51 deletions(-)
diff --git a/paddle/fluid/operators/fused/yolo_box_head_op.cu b/paddle/fluid/operators/fused/yolo_box_head_op.cu
index 88d589f85b0..abb7b5aeaae 100644
--- a/paddle/fluid/operators/fused/yolo_box_head_op.cu
+++ b/paddle/fluid/operators/fused/yolo_box_head_op.cu
@@ -63,7 +63,7 @@ __global__ void YoloBoxHeadCudaKernel(const T* input,
   }
 }
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class YoloBoxHeadKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -103,4 +103,5 @@ class YoloBoxHeadKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(yolo_box_head, ops::YoloBoxHeadKernel<float>);
+PD_REGISTER_STRUCT_KERNEL(
+    yolo_box_head, GPU, ALL_LAYOUT, ops::YoloBoxHeadKernel, float) {}
diff --git a/paddle/fluid/operators/fused/yolo_box_post_op.cu b/paddle/fluid/operators/fused/yolo_box_post_op.cu
index fc01d7027f3..72bb97a2aae 100644
--- a/paddle/fluid/operators/fused/yolo_box_post_op.cu
+++ b/paddle/fluid/operators/fused/yolo_box_post_op.cu
@@ -315,7 +315,7 @@ static void YoloTensorParseCuda(
       prob_thresh);
 }
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class YoloBoxPostKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -555,4 +555,5 @@ class YoloBoxPostKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(yolo_box_post, ops::YoloBoxPostKernel<float>);
+PD_REGISTER_STRUCT_KERNEL(
+    yolo_box_post, GPU, ALL_LAYOUT, ops::YoloBoxPostKernel, float) {}
diff --git a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc
index bdbef1a7eee..8acaad0f60e 100644
--- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc
+++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc
@@ -56,7 +56,7 @@ inline void UniformRealDistribution(paddle::platform::bfloat16 *data,
 // It seems that Eigen::Tensor::random in GPU will SEGFAULT.
 // Use std::random and thrust::random(thrust is a std library in CUDA) to
 // implement uniform random.
-template <typename T>
+template <typename T, typename DeviceContext>
 class CPUUniformRandomKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
@@ -178,16 +178,20 @@ with random values sampled from a uniform distribution.
 }  // namespace operators
 }  // namespace paddle
 
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
 REGISTER_OPERATOR(
     uniform_random_batch_size_like,
-    paddle::operators::UniformRandomBatchSizeLikeOp,
-    paddle::operators::UniformRandomBatchSizeLikeOpMaker,
+    ops::UniformRandomBatchSizeLikeOp,
+    ops::UniformRandomBatchSizeLikeOpMaker,
     paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
     paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
-    paddle::operators::BatchSizeLikeNoNeedBufferVarsInferer);
-
-REGISTER_OP_CPU_KERNEL(
-    uniform_random_batch_size_like,
-    paddle::operators::CPUUniformRandomKernel<float>,
-    paddle::operators::CPUUniformRandomKernel<double>,
-    paddle::operators::CPUUniformRandomKernel<paddle::platform::bfloat16>);
+    ops::BatchSizeLikeNoNeedBufferVarsInferer);
+
+PD_REGISTER_STRUCT_KERNEL(uniform_random_batch_size_like,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::CPUUniformRandomKernel,
+                          float,
+                          double,
+                          plat::bfloat16) {}
diff --git a/paddle/fluid/operators/uniform_random_batch_size_like_op.cu b/paddle/fluid/operators/uniform_random_batch_size_like_op.cu
index 4c60cb76fb9..1bbd6eba3c6 100644
--- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cu
+++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cu
@@ -16,7 +16,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class GPUUniformRandomKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -59,6 +59,10 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> {
 }  // namespace operators
 }  // namespace paddle
 
-REGISTER_OP_CUDA_KERNEL(uniform_random_batch_size_like,
-                        paddle::operators::GPUUniformRandomKernel<float>,
-                        paddle::operators::GPUUniformRandomKernel<double>);
+namespace ops = paddle::operators;
+PD_REGISTER_STRUCT_KERNEL(uniform_random_batch_size_like,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::GPUUniformRandomKernel,
+                          float,
+                          double) {}
diff --git a/paddle/fluid/operators/unzip_op.cc b/paddle/fluid/operators/unzip_op.cc
index ffb46c2f4b5..556b345c17b 100644
--- a/paddle/fluid/operators/unzip_op.cc
+++ b/paddle/fluid/operators/unzip_op.cc
@@ -162,10 +162,7 @@ REGISTER_OPERATOR(unzip,
 
 REGISTER_OPERATOR(unzip_grad, ops::unzipGradientOp);
 
-REGISTER_OP_CPU_KERNEL(unzip,
-                       ops::unzipOpKernel<int64_t>,
-                       ops::unzipOpKernel<int64_t>);
-
-REGISTER_OP_CPU_KERNEL(unzip_grad,
-                       ops::unzipGradOpKernel<int64_t>,
-                       ops::unzipGradOpKernel<int64_t>);
+PD_REGISTER_STRUCT_KERNEL(unzip, CPU, ALL_LAYOUT, ops::unzipOpKernel, int64_t) {
+}
+PD_REGISTER_STRUCT_KERNEL(
+    unzip_grad, CPU, ALL_LAYOUT, ops::unzipGradOpKernel, int64_t) {}
diff --git a/paddle/fluid/operators/unzip_op.cu b/paddle/fluid/operators/unzip_op.cu
index 0605ce4ab91..d60af556cd2 100644
--- a/paddle/fluid/operators/unzip_op.cu
+++ b/paddle/fluid/operators/unzip_op.cu
@@ -42,7 +42,7 @@ __global__ void unzipKernel(
   }
 }
 
-template <typename T, typename LodType>
+template <typename T, typename DeviceContext, typename LodType = int64_t>
 class unzipCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -69,7 +69,7 @@ class unzipCUDAKernel : public framework::OpKernel<T> {
   }
 };
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class unzipGradCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -81,25 +81,24 @@ class unzipGradCUDAKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
-    unzip,
-    ops::unzipCUDAKernel<float, int>,
-    ops::unzipCUDAKernel<double, int>,
-    ops::unzipCUDAKernel<paddle::platform::float16, int>,
-    ops::unzipCUDAKernel<int, int>,
-    ops::unzipCUDAKernel<bool, int>,
-    ops::unzipCUDAKernel<int64_t, int>,
-    ops::unzipCUDAKernel<float, int64_t>,
-    ops::unzipCUDAKernel<double, int64_t>,
-    ops::unzipCUDAKernel<paddle::platform::float16, int64_t>,
-    ops::unzipCUDAKernel<int, int64_t>,
-    ops::unzipCUDAKernel<bool, int64_t>,
-    ops::unzipCUDAKernel<int64_t, int64_t>);
-
-REGISTER_OP_CUDA_KERNEL(unzip_grad,
-                        ops::unzipGradCUDAKernel<float>,
-                        ops::unzipGradCUDAKernel<double>,
-                        ops::unzipGradCUDAKernel<paddle::platform::float16>,
-                        ops::unzipGradCUDAKernel<int>,
-                        ops::unzipGradCUDAKernel<bool>,
-                        ops::unzipGradCUDAKernel<int64_t>);
+namespace plat = paddle::platform;
+PD_REGISTER_STRUCT_KERNEL(unzip,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::unzipCUDAKernel,
+                          float,
+                          double,
+                          plat::float16,
+                          bool,
+                          int,
+                          int64_t) {}
+PD_REGISTER_STRUCT_KERNEL(unzip_grad,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::unzipGradCUDAKernel,
+                          float,
+                          double,
+                          plat::float16,
+                          bool,
+                          int,
+                          int64_t) {}
diff --git a/paddle/fluid/operators/unzip_op.h b/paddle/fluid/operators/unzip_op.h
index f177f69476f..6829d00dccf 100644
--- a/paddle/fluid/operators/unzip_op.h
+++ b/paddle/fluid/operators/unzip_op.h
@@ -19,7 +19,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class unzipOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -27,7 +27,7 @@ class unzipOpKernel : public framework::OpKernel<T> {
   }
 };
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class unzipGradOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-- 
GitLab