Use thrust to implement uniform_random

0c37705d · Yu Yang · fd0bdb4f · 0c37705d · 0c37705d · 0c37705d
3 changed file
--- a/paddle/operators/uniform_random_op.cc
+++ b/paddle/operators/uniform_random_op.cc
@@ -49,5 +49,4 @@ Used to initialize tensor with uniform random generator.
 }  // namespace paddle

 REGISTER_OP(uniform_random, ops::RandomOp, ops::RandomOpMaker);
-REGISTER_OP_CPU_KERNEL(uniform_random,
-                       ops::UniformRandomKernel<ops::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(uniform_random, ops::CPUUniformRandomKernel<float>);
--- a/paddle/operators/uniform_random_op.cu
+++ b/paddle/operators/uniform_random_op.cu
@@ -12,7 +12,54 @@
   See the License for the specific language governing permissions and
   limitations under the License. */

-#include "paddle/operators/uniform_random_op.h"
+#include <thrust/device_ptr.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/random.h>
+#include <thrust/transform.h>
+#include "paddle/operators/type_alias.h"

-REGISTER_OP_GPU_KERNEL(uniform_random,
-                       ops::UniformRandomKernel<ops::GPUPlace, float>);
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct UniformGenerator {
+  T min_, max_;
+  unsigned int seed_;
+
+  __host__ __device__ UniformGenerator(T min, T max, int seed)
+      : min_(min), max_(max), seed_(seed) {}
+
+  __host__ __device__ T operator()(const unsigned int n) const {
+    thrust::minstd_rand rng;
+    rng.seed(seed_);
+    thrust::uniform_real_distribution<T> dist(min_, max_);
+    rng.discard(n);
+    return dist(rng);
+  }
+};
+
+template <typename T>
+class GPUUniformRandomKernel : public OpKernel {
+ public:
+  void Compute(const ExecutionContext& context) const override {
+    auto* tensor = context.Output<Tensor>(0);
+    T* data = tensor->mutable_data<T>(context.GetPlace());
+    unsigned int seed =
+        static_cast<unsigned int>(context.op_.GetAttr<int>("seed"));
+    if (seed == 0) {
+      seed = std::random_device()();
+    }
+    T min = static_cast<T>(context.op_.GetAttr<float>("min"));
+    T max = static_cast<T>(context.op_.GetAttr<float>("max"));
+    thrust::counting_iterator<unsigned int> index_sequence_begin(0);
+    ssize_t N = framework::product(tensor->dims());
+    thrust::transform(index_sequence_begin, index_sequence_begin + N,
+                      thrust::device_ptr<T>(data),
+                      UniformGenerator<T>(min, max, seed));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+REGISTER_OP_GPU_KERNEL(uniform_random, ops::GPUUniformRandomKernel<float>);
--- a/paddle/operators/uniform_random_op.h
+++ b/paddle/operators/uniform_random_op.h
@@ -13,25 +13,30 @@
   limitations under the License. */

 #pragma once
+#include <random>
+#include <type_traits>
 #include "paddle/operators/type_alias.h"
 namespace paddle {
 namespace operators {

-template <typename Place, typename T>
-class UniformRandomKernel : public OpKernel {
+template <typename T>
+class CPUUniformRandomKernel : public OpKernel {
 public:
-  void Compute(const ExecutionContext &context) const override {
-    auto tensor = context.Output<Tensor>(0);
-    tensor->mutable_data<T>(context.GetPlace());
-
-    auto eigenTensor = EigenVector<T>::Flatten(*tensor);
-    auto dev = context.GetEigenDevice<Place>();
-    auto min = context.op_.GetAttr<float>("min");
-    auto max = context.op_.GetAttr<float>("max");
-    auto seed = static_cast<uint64_t>(context.op_.GetAttr<int>("seed"));
-    auto diff = max - min;
-    Eigen::internal::UniformRandomGenerator<T> gen(seed);
-    eigenTensor.device(dev) = eigenTensor.random(gen) * diff + min;
+  void Compute(const ExecutionContext& context) const override {
+    auto* tensor = context.Output<Tensor>(0);
+    T* data = tensor->mutable_data<T>(context.GetPlace());
+    unsigned int seed =
+        static_cast<unsigned int>(context.op_.GetAttr<int>("seed"));
+    std::minstd_rand engine;
+    if (seed == 0) {
+      seed = std::random_device()();
+    }
+    engine.seed(seed);
+    std::uniform_real_distribution<T> dist(static_cast<T>(context.op_.GetAttr<float>("min")),
+                                           static_cast<T>(context.op_.GetAttr<float>("max")));
+    for (ssize_t i = 0; i < framework::product(tensor->dims()); ++i) {
+      data[i] = dist(engine);
+    }
  }
 };