From 22f84122411d738bba344bdf3124e414126aa921 Mon Sep 17 00:00:00 2001
From: Leo Chen <chenqiuliang@baidu.com>
Date: Fri, 25 Feb 2022 10:55:14 +0800
Subject: [PATCH] [phi] refine code of randint, randperm, unbind kernel
 (#39909)

* refine randint kernel

* refine randperm kernel

* refine unbind kernel

* support op seed
---
 paddle/phi/kernels/cpu/randint_kernel.cc      | 17 ++++----
 paddle/phi/kernels/cpu/randperm_kernel.cc     | 38 +++++++++++++----
 paddle/phi/kernels/cpu/unbind_kernel.cc       |  1 +
 .../kernels/funcs/concat_and_split_functor.cc | 15 -------
 .../kernels/funcs/concat_and_split_functor.cu | 18 ++------
 .../kernels/funcs/concat_and_split_functor.h  | 16 ++++----
 paddle/phi/kernels/gpu/randint_kernel.cu      | 21 +++++-----
 paddle/phi/kernels/gpu/randperm_kernel.cu     | 41 ++++++++++++++-----
 paddle/phi/kernels/gpu/unbind_kernel.cu       |  3 +-
 paddle/phi/kernels/impl/unbind_kernel_impl.h  |  6 +--
 paddle/phi/kernels/randint_kernel.h           |  4 +-
 paddle/phi/kernels/randperm_kernel.h          |  6 ++-
 paddle/phi/ops/compat/randperm_sig.cc         |  7 +++-
 13 files changed, 108 insertions(+), 85 deletions(-)
diff --git a/paddle/phi/kernels/cpu/randint_kernel.cc b/paddle/phi/kernels/cpu/randint_kernel.cc
index 5fe56b57452..feb418949ba 100644
--- a/paddle/phi/kernels/cpu/randint_kernel.cc
+++ b/paddle/phi/kernels/cpu/randint_kernel.cc
@@ -22,42 +22,43 @@
 namespace phi {
 
 template <typename T, typename Context>
-void RandintRawKernel(const Context& ctx,
+void RandintRawKernel(const Context& dev_ctx,
                       int low,
                       int high,
                       const ScalarArray& shape,
                       DataType dtype,
                       int seed,
                       DenseTensor* out) {
-  out->ResizeAndAllocate(phi::make_ddim(shape.GetData()));
-  auto size = out->numel();
+  out->Resize(phi::make_ddim(shape.GetData()));
+  T* data = dev_ctx.template Alloc<T>(out);
+  auto numel = out->numel();
   std::shared_ptr<std::mt19937_64> engine;
   if (seed) {
     engine = std::make_shared<std::mt19937_64>();
     engine->seed(seed);
   } else {
-    engine = ctx.GetGenerator()->GetCPUEngine();
+    engine = dev_ctx.GetGenerator()->GetCPUEngine();
   }
   std::uniform_int_distribution<T> dist(low, high - 1);
-  auto data = out->data<T>();
-  for (int64_t i = 0; i < size; ++i) {
+  for (int64_t i = 0; i < numel; ++i) {
     data[i] = dist(*engine);
   }
 }
 
 template <typename T, typename Context>
-void RandintKernel(const Context& ctx,
+void RandintKernel(const Context& dev_ctx,
                    int low,
                    int high,
                    const ScalarArray& shape,
                    DataType dtype,
                    DenseTensor* out) {
-  RandintRawKernel<T>(ctx, low, high, shape, dtype, 0, out);
+  RandintRawKernel<T>(dev_ctx, low, high, shape, dtype, 0, out);
 }
 
 }  // namespace phi
 
 PD_REGISTER_KERNEL(
     randint_raw, CPU, ALL_LAYOUT, phi::RandintRawKernel, int, int64_t) {}
+
 PD_REGISTER_KERNEL(randint, CPU, ALL_LAYOUT, phi::RandintKernel, int, int64_t) {
 }
diff --git a/paddle/phi/kernels/cpu/randperm_kernel.cc b/paddle/phi/kernels/cpu/randperm_kernel.cc
index 28092c8df6d..6cb435f53b8 100644
--- a/paddle/phi/kernels/cpu/randperm_kernel.cc
+++ b/paddle/phi/kernels/cpu/randperm_kernel.cc
@@ -13,20 +13,23 @@
 // limitations under the License.
 
 #include "paddle/phi/kernels/randperm_kernel.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/device_context.h"
+
 #include "paddle/phi/core/kernel_registry.h"
 
 namespace phi {
 
 template <typename T, typename Context>
-void RandpermKernel(const Context& ctx,
-                    int n,
-                    DataType dtype,
-                    DenseTensor* out) {
-  T* out_data = ctx.template Alloc<T>(out);
-  auto gen_ptr = ctx.GetHostGenerator();
-  auto engine = gen_ptr->GetCPUEngine();
+void RandpermRawKernel(
+    const Context& dev_ctx, int n, DataType dtype, int seed, DenseTensor* out) {
+  T* out_data = dev_ctx.template Alloc<T>(out);
+
+  std::shared_ptr<std::mt19937_64> engine;
+  if (seed) {
+    engine = std::make_shared<std::mt19937_64>();
+    engine->seed(seed);
+  } else {
+    engine = dev_ctx.GetGenerator()->GetCPUEngine();
+  }
 
   for (int i = 0; i < n; ++i) {
     out_data[i] = static_cast<T>(i);
@@ -34,8 +37,25 @@ void RandpermKernel(const Context& ctx,
   std::shuffle(out_data, out_data + n, *engine);
 }
 
+template <typename T, typename Context>
+void RandpermKernel(const Context& dev_ctx,
+                    int n,
+                    DataType dtype,
+                    DenseTensor* out) {
+  RandpermRawKernel<T>(dev_ctx, n, dtype, 0, out);
+}
+
 }  // namespace phi
 
+PD_REGISTER_KERNEL(randperm_raw,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::RandpermRawKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t) {}
+
 PD_REGISTER_KERNEL(randperm,
                    CPU,
                    ALL_LAYOUT,
diff --git a/paddle/phi/kernels/cpu/unbind_kernel.cc b/paddle/phi/kernels/cpu/unbind_kernel.cc
index 655f8c8aafb..39cc2f8fc46 100644
--- a/paddle/phi/kernels/cpu/unbind_kernel.cc
+++ b/paddle/phi/kernels/cpu/unbind_kernel.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "paddle/phi/kernels/unbind_kernel.h"
+
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/unbind_kernel_impl.h"
 
diff --git a/paddle/phi/kernels/funcs/concat_and_split_functor.cc b/paddle/phi/kernels/funcs/concat_and_split_functor.cc
index c8405703a5c..aa73ba5f689 100644
--- a/paddle/phi/kernels/funcs/concat_and_split_functor.cc
+++ b/paddle/phi/kernels/funcs/concat_and_split_functor.cc
@@ -12,21 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <cmath>
-#include <memory>
-#include <vector>
-
-#include "paddle/fluid/framework/convert_utils.h"
-#include "paddle/fluid/framework/eigen.h"
-#include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/framework/tensor_util.h"
-#include "paddle/fluid/platform/device_context.h"
-#include "paddle/fluid/platform/enforce.h"
-#include "paddle/phi/core/utils/data_type.h"
-
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
 
 namespace phi {
diff --git a/paddle/phi/kernels/funcs/concat_and_split_functor.cu b/paddle/phi/kernels/funcs/concat_and_split_functor.cu
index 2abfdb606e7..840c8872f50 100644
--- a/paddle/phi/kernels/funcs/concat_and_split_functor.cu
+++ b/paddle/phi/kernels/funcs/concat_and_split_functor.cu
@@ -12,23 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <cmath>
-#include <memory>
-#include <vector>
-
-#include "paddle/fluid/framework/convert_utils.h"
-#include "paddle/fluid/framework/eigen.h"
-#include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/framework/tensor_util.h"
-#include "paddle/fluid/platform/cuda_graph_with_memory_pool.h"
-#include "paddle/fluid/platform/device_context.h"
-#include "paddle/fluid/platform/enforce.h"
-
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/core/utils/data_type.h"
 #include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
 
+#include "paddle/fluid/memory/malloc.h"
+#include "paddle/fluid/platform/cuda_graph_with_memory_pool.h"
+
 namespace phi {
 namespace funcs {
 
diff --git a/paddle/phi/kernels/funcs/concat_and_split_functor.h b/paddle/phi/kernels/funcs/concat_and_split_functor.h
index 3af4d878d3c..4cb15fe539b 100644
--- a/paddle/phi/kernels/funcs/concat_and_split_functor.h
+++ b/paddle/phi/kernels/funcs/concat_and_split_functor.h
@@ -13,20 +13,18 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
-#include <cmath>
-#include <memory>
+
 #include <vector>
 
-#include "paddle/fluid/framework/convert_utils.h"
-#include "paddle/fluid/framework/eigen.h"
-#include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/framework/tensor_util.h"
-#include "paddle/fluid/platform/device_context.h"
-#include "paddle/fluid/platform/enforce.h"
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/device_context.h"
 #include "paddle/phi/core/utils/data_type.h"
 
+// See Note [ Why still include the fluid headers? ]
+#include "paddle/fluid/memory/memcpy.h"
+
 namespace phi {
 namespace funcs {
 
diff --git a/paddle/phi/kernels/gpu/randint_kernel.cu b/paddle/phi/kernels/gpu/randint_kernel.cu
index b89b714c73d..66dc5f72a5c 100644
--- a/paddle/phi/kernels/gpu/randint_kernel.cu
+++ b/paddle/phi/kernels/gpu/randint_kernel.cu
@@ -25,7 +25,7 @@
 namespace phi {
 
 template <typename T, typename Context>
-void RandintRawKernel(const Context& ctx,
+void RandintRawKernel(const Context& dev_ctx,
                       int low,
                       int high,
                       const ScalarArray& shape,
@@ -34,21 +34,22 @@ void RandintRawKernel(const Context& ctx,
                       DenseTensor* out) {
   DenseTensor tmp;
   tmp.Resize(phi::make_ddim(shape.GetData()));
-  T* tmp_data = ctx.template HostAlloc<T>(&tmp);
+  T* tmp_data = dev_ctx.template HostAlloc<T>(&tmp);
 
-  out->ResizeAndAllocate(tmp.dims());
-  auto size = out->numel();
+  out->Resize(tmp.dims());
+  T* data = dev_ctx.template Alloc<T>(out);
 
   std::shared_ptr<std::mt19937_64> engine;
   if (seed) {
     engine = std::make_shared<std::mt19937_64>();
     engine->seed(seed);
   } else {
-    engine = ctx.GetHostGenerator()->GetCPUEngine();
+    engine = dev_ctx.GetHostGenerator()->GetCPUEngine();
   }
+
   std::uniform_int_distribution<T> dist(low, high - 1);
-  auto data = out->data<T>();
-  for (int64_t i = 0; i < size; ++i) {
+  auto numel = out->numel();
+  for (int64_t i = 0; i < numel; ++i) {
     tmp_data[i] = dist(*engine);
   }
 
@@ -57,18 +58,18 @@ void RandintRawKernel(const Context& ctx,
       data,
       tmp.place(),
       tmp_data,
-      size * paddle::experimental::SizeOf(out->dtype()),
+      numel * paddle::experimental::SizeOf(out->dtype()),
       0);
 }
 
 template <typename T, typename Context>
-void RandintKernel(const Context& ctx,
+void RandintKernel(const Context& dev_ctx,
                    int low,
                    int high,
                    const ScalarArray& shape,
                    DataType dtype,
                    DenseTensor* out) {
-  RandintRawKernel<T>(ctx, low, high, shape, dtype, 0, out);
+  RandintRawKernel<T>(dev_ctx, low, high, shape, dtype, 0, out);
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/gpu/randperm_kernel.cu b/paddle/phi/kernels/gpu/randperm_kernel.cu
index f75f768b633..d4d90cac917 100644
--- a/paddle/phi/kernels/gpu/randperm_kernel.cu
+++ b/paddle/phi/kernels/gpu/randperm_kernel.cu
@@ -12,41 +12,60 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/device_context.h"
 #include "paddle/phi/kernels/randperm_kernel.h"
 
+#include "paddle/phi/core/kernel_registry.h"
+
 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/memory/memcpy.h"
-#include "paddle/phi/core/kernel_registry.h"
 
 namespace phi {
 
 template <typename T, typename Context>
-void RandpermKernel(const Context& ctx,
-                    int n,
-                    DataType dtype,
-                    DenseTensor* out) {
+void RandpermRawKernel(
+    const Context& dev_ctx, int n, DataType dtype, int seed, DenseTensor* out) {
   DenseTensor tmp;
   tmp.Resize(phi::make_ddim({n}));
-  T* tmp_data = ctx.template HostAlloc<T>(&tmp);
+  T* tmp_data = dev_ctx.template HostAlloc<T>(&tmp);
 
-  auto gen_ptr = ctx.GetHostGenerator();
-  auto engine = gen_ptr->GetCPUEngine();
+  std::shared_ptr<std::mt19937_64> engine;
+  if (seed) {
+    engine = std::make_shared<std::mt19937_64>();
+    engine->seed(seed);
+  } else {
+    engine = dev_ctx.GetHostGenerator()->GetCPUEngine();
+  }
 
   for (int i = 0; i < n; ++i) {
     tmp_data[i] = static_cast<T>(i);
   }
   std::shuffle(tmp_data, tmp_data + n, *engine);
 
-  T* out_data = ctx.template Alloc<T>(out);
+  T* out_data = dev_ctx.template Alloc<T>(out);
   auto size = out->numel() * paddle::experimental::SizeOf(out->dtype());
   paddle::memory::Copy<phi::GPUPlace, phi::Place>(
       out->place(), out_data, tmp.place(), tmp_data, size, 0);
 }
 
+template <typename T, typename Context>
+void RandpermKernel(const Context& dev_ctx,
+                    int n,
+                    DataType dtype,
+                    DenseTensor* out) {
+  RandpermRawKernel<T>(dev_ctx, n, dtype, 0, out);
+}
+
 }  // namespace phi
 
+PD_REGISTER_KERNEL(randperm_raw,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::RandpermRawKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t) {}
+
 PD_REGISTER_KERNEL(randperm,
                    GPU,
                    ALL_LAYOUT,
diff --git a/paddle/phi/kernels/gpu/unbind_kernel.cu b/paddle/phi/kernels/gpu/unbind_kernel.cu
index 1efc3a1094d..8a7aa8f6033 100644
--- a/paddle/phi/kernels/gpu/unbind_kernel.cu
+++ b/paddle/phi/kernels/gpu/unbind_kernel.cu
@@ -12,9 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "paddle/phi/kernels/unbind_kernel.h"
+
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/unbind_kernel_impl.h"
-#include "paddle/phi/kernels/unbind_kernel.h"
 
 PD_REGISTER_KERNEL(unbind,
                    GPU,
diff --git a/paddle/phi/kernels/impl/unbind_kernel_impl.h b/paddle/phi/kernels/impl/unbind_kernel_impl.h
index 8a1342559bd..3e233a2038e 100644
--- a/paddle/phi/kernels/impl/unbind_kernel_impl.h
+++ b/paddle/phi/kernels/impl/unbind_kernel_impl.h
@@ -20,7 +20,7 @@
 namespace phi {
 
 template <typename T, typename Context>
-void UnbindKernel(const Context& ctx,
+void UnbindKernel(const Context& dev_ctx,
                   const DenseTensor& x,
                   int axis,
                   std::vector<DenseTensor*> outs) {
@@ -29,12 +29,12 @@ void UnbindKernel(const Context& ctx,
 
   std::vector<const DenseTensor*> shape_refer;
   for (size_t j = 0; j < outs.size(); ++j) {
-    ctx.template Alloc<T>(outs[j]);
+    dev_ctx.template Alloc<T>(outs[j]);
     shape_refer.emplace_back(outs[j]);
   }
 
   phi::funcs::SplitFunctor<Context, T> functor;
-  functor(ctx, x, shape_refer, axis, &outs);
+  functor(dev_ctx, x, shape_refer, axis, &outs);
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/randint_kernel.h b/paddle/phi/kernels/randint_kernel.h
index 1a78e73d863..bfefc628614 100644
--- a/paddle/phi/kernels/randint_kernel.h
+++ b/paddle/phi/kernels/randint_kernel.h
@@ -20,7 +20,7 @@
 namespace phi {
 
 template <typename T, typename Context>
-void RandintKernel(const Context& ctx,
+void RandintKernel(const Context& dev_ctx,
                    int low,
                    int high,
                    const ScalarArray& shape,
@@ -28,7 +28,7 @@ void RandintKernel(const Context& ctx,
                    DenseTensor* out);
 
 template <typename T, typename Context>
-void RandintRawKernel(const Context& ctx,
+void RandintRawKernel(const Context& dev_ctx,
                       int low,
                       int high,
                       const ScalarArray& shape,
diff --git a/paddle/phi/kernels/randperm_kernel.h b/paddle/phi/kernels/randperm_kernel.h
index 63bdac6da6f..70b95db98be 100644
--- a/paddle/phi/kernels/randperm_kernel.h
+++ b/paddle/phi/kernels/randperm_kernel.h
@@ -20,7 +20,11 @@
 namespace phi {
 
 template <typename T, typename Context>
-void RandpermKernel(const Context& ctx,
+void RandpermRawKernel(
+    const Context& dev_ctx, int n, DataType dtype, int seed, DenseTensor* out);
+
+template <typename T, typename Context>
+void RandpermKernel(const Context& dev_ctx,
                     int n,
                     DataType dtype,
                     DenseTensor* out);
diff --git a/paddle/phi/ops/compat/randperm_sig.cc b/paddle/phi/ops/compat/randperm_sig.cc
index 14b28512e40..89548beff67 100644
--- a/paddle/phi/ops/compat/randperm_sig.cc
+++ b/paddle/phi/ops/compat/randperm_sig.cc
@@ -17,7 +17,12 @@
 namespace phi {
 
 KernelSignature RandpermOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("randperm", {}, {"n", "dtype"}, {"Out"});
+  int seed = paddle::any_cast<int>(ctx.Attr("seed"));
+  if (seed) {
+    return KernelSignature("randperm", {}, {"n", "dtype", "seed"}, {"Out"});
+  } else {
+    return KernelSignature("randperm", {}, {"n", "dtype"}, {"Out"});
+  }
 }
 
 }  // namespace phi
-- 
GitLab