remove is_init_py of RandomGenerator, and use Global RandomGenerator by default (#42876)

* remove is_init_py of RandomGenerator, and use Global Generator if not OP seed * fix comment

remove is_init_py of RandomGenerator, and use Global RandomGenerator by default (#42876)
* remove is_init_py of RandomGenerator, and use Global Generator if not OP seed * fix comment
3b488bae · zhouweiwei2014 · GitHub · 2b4977f2 · 3b488bae · 3b488bae
21 changed file
--- a/paddle/fluid/framework/generator.cc
+++ b/paddle/fluid/framework/generator.cc
@@ -24,7 +24,7 @@ limitations under the License. */
 namespace paddle {
 namespace framework {

-const std::shared_ptr<Generator>& GetDefaultCUDAGenerator(int64_t device_id) {
+const std::shared_ptr<Generator>& DefaultCUDAGenerator(int64_t device_id) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)

  static int64_t num_cuda_devices = -1;
@@ -58,8 +58,6 @@ const std::shared_ptr<Generator>& GetDefaultCUDAGenerator(int64_t device_id) {
 const std::shared_ptr<Generator>& DefaultCPUGenerator() {
  static auto default_cpu_generator =
      std::make_shared<Generator>(GetRandomSeed());
-  VLOG(4) << "initial seed: " << default_cpu_generator->GetCurrentSeed()
-          << ", cpu engine: " << default_cpu_generator->GetCPUEngine().get();
  return default_cpu_generator;
 }

@@ -100,19 +98,13 @@ const std::shared_ptr<Generator>& GetRandomSeedGenerator(
  return iter->second;
 }

-std::shared_ptr<std::mt19937_64> OpDefaultCPUEngine() {
-  static auto op_default_cpu_engine = std::make_shared<std::mt19937_64>();
-  return op_default_cpu_engine;
-}
-
-// NOTE(zhiqiu): there are 3 conditions:
-// (1) op seed is not set and DefaultCPUGenerator is inited, use
-// DefaultCPUGenerator
-// (2) op seed is not set and DefaultCPUGenerator is not inited, use se
-// OpDefaultCPUEngine() and set a radnom seed
-// (3) op seed is set, use OpDefaultCPUEngine() and set the seed
+// There are 3 conditions:
+// (1) op seed is set, use op seed.
+// (2) op seed is not set, global seed is set, use global seed.
+// (3) op seed is not set, global seed is not set too, use random seed from
+// RandomGenerator.
 std::shared_ptr<std::mt19937_64> GetCPURandomEngine(uint64_t seed) {
-  if (DefaultCPUGenerator()->GetIsInitPy() && seed == 0) {
+  if (seed == 0) {
    VLOG(4) << "Use random engine from generator";
    return DefaultCPUGenerator()->GetCPUEngine();
  } else {
@@ -123,12 +115,6 @@ std::shared_ptr<std::mt19937_64> GetCPURandomEngine(uint64_t seed) {
    //
    // And we need to measure the determinacy of Generator in PE.
    auto engine = std::make_shared<std::mt19937_64>();
-    if (seed == 0) {
-      seed = GetRandomSeed();
-      VLOG(4) << "Use default random engine with random seed = " << seed;
-    } else {
-      VLOG(4) << "Use default random engine with fixed random seed = " << seed;
-    }
    static std::mutex mu_;
    {
      std::lock_guard<std::mutex> lock(mu_);
@@ -204,11 +190,5 @@ std::pair<uint64_t, uint64_t> Generator::IncrementOffset(
 #endif
 }

-void Generator::SetIsInitPy(bool is_init_py) {
-  this->is_init_py_ = is_init_py;
-  VLOG(4) << "SetIsInitPy:" << this->is_init_py_;
-}
-bool Generator::GetIsInitPy() const { return this->is_init_py_; }
-
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/generator.h
+++ b/paddle/fluid/framework/generator.h
@@ -59,7 +59,6 @@ struct Generator : public phi::Generator {
    this->engine_ = engine;
    VLOG(4) << "initial seed: " << this->state_.current_seed
            << ", cpu engine: " << &this->state_.cpu_engine;
-    this->is_init_py_ = true;  // TODO(zhiqiu): remove it in future
  }
  Generator(uint64_t seed, uint64_t device_id) {
    std::seed_seq seq({seed});
@@ -71,7 +70,6 @@ struct Generator : public phi::Generator {
    this->engine_ = engine;
    VLOG(4) << "initial seed: " << this->state_.current_seed
            << ", cpu engine: " << &this->state_.cpu_engine;
-    this->is_init_py_ = false;  // TODO(zhiqiu): remove it in future
  }

  Generator(const Generator& other) = delete;
@@ -95,32 +93,21 @@ struct Generator : public phi::Generator {

  std::pair<uint64_t, uint64_t> IncrementOffset(uint64_t increament_offset);

-  void SetIsInitPy(bool);
-  bool GetIsInitPy() const;
  uint64_t get_device_id() { return this->state_.device; }

 private:
  phi::Generator::GeneratorState state_;
  std::shared_ptr<std::mt19937_64> engine_;
  mutable std::mutex mu_;
-
-  // NOTE(zhiqiu): is_init_py_ is used to make generator be compatible with
-  // old seed, and it should be removed after all random-related operators
-  // and unittests upgrades to use generator.
-  bool is_init_py_ = false;
 };

 // The DefaultCPUGenerator is used in manual_seed()
 const std::shared_ptr<Generator>& DefaultCPUGenerator();

-// If op seed is set or global is not set, the OpDefaultCPUEngine is used.
-std::shared_ptr<std::mt19937_64> OpDefaultCPUEngine();
+const std::shared_ptr<Generator>& DefaultCUDAGenerator(int64_t device_id = -1);

 std::shared_ptr<std::mt19937_64> GetCPURandomEngine(uint64_t);

-const std::shared_ptr<Generator>& GetDefaultCUDAGenerator(
-    int64_t device_id = -1);
-
 const std::shared_ptr<Generator>& SetRandomSeedGenerator(
    const std::string& name, uint64_t seed);


--- a/paddle/fluid/operators/class_center_sample_op.cu
+++ b/paddle/fluid/operators/class_center_sample_op.cu
@@ -416,14 +416,13 @@ class ClassCenterSampleCUDAKernel : public framework::OpKernel<T> {
                   1) *
                  vec_size;
    int device_id = ctx.GetPlace().GetDeviceId();
-    auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
-    if (gen_cuda->GetIsInitPy() && (!fix_seed)) {
+    auto gen_cuda = framework::DefaultCUDAGenerator(device_id);
+    if (!fix_seed) {
      auto seed_offset = gen_cuda->IncrementOffset(offset);
      seed_data = seed_offset.first;
      increment = seed_offset.second;
    } else {
-      std::random_device rnd;
-      seed_data = fix_seed ? seed + rank : rnd();
+      seed_data = seed + rank;
      increment = offset;
    }
    RandomSampleClassCenter<T><<<NumBlocks(num_classes), kNumCUDAThreads, 0,

--- a/paddle/fluid/operators/cudnn_lstm_op.cu.cc
+++ b/paddle/fluid/operators/cudnn_lstm_op.cu.cc
@@ -172,17 +172,13 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
    int seed = ctx.Attr<int>("seed");

    if (!is_test) {
-      int device_id = ctx.GetPlace().GetDeviceId();
-      auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
-      if (gen_cuda->GetIsInitPy() && seed == 0) {
-        // If perform `manual_seed` in python and inner seed is not specified
-        // (equals 0), use global generator generated seed.
+      if (seed == 0) {
+        // If not specify seed, use global Generator to generate seed.
+        int device_id = ctx.GetPlace().GetDeviceId();
+        auto gen_cuda = paddle::framework::DefaultCUDAGenerator(device_id);
        seed = static_cast<int>(gen_cuda->Random64());
-      } else if (seed == 0) {
-        // use random generated seed
-        std::random_device rd;
-        seed = rd();
-      }  // else use `ctx.Attr<int>("seed")` specified seed
+      }
+      // else use `ctx.Attr<int>("seed")` specified seed
    }

    bool has_seq_length = ctx.HasInput("SequenceLength");

--- a/paddle/fluid/operators/dirichlet_op.cu
+++ b/paddle/fluid/operators/dirichlet_op.cu
@@ -77,7 +77,7 @@ struct DirichletSampler<platform::CUDADeviceContext, T> {

    // init state, seed & offset for all threads
    int device_id = ctx.GetPlace().GetDeviceId();
-    auto p_gen = framework::GetDefaultCUDAGenerator(device_id);
+    auto p_gen = framework::DefaultCUDAGenerator(device_id);
    auto seed_and_offset = p_gen->IncrementOffset(10);  // hard-coded offset
    auto seed = seed_and_offset.first;
    auto offset = seed_and_offset.second;

--- a/paddle/fluid/operators/dropout_impl_util.h
+++ b/paddle/fluid/operators/dropout_impl_util.h
@@ -26,7 +26,7 @@ inline void GetSeedDataAndIncrement(const phi::GPUContext& dev_ctx,
                                    const int offset, uint64_t* seed_data,
                                    uint64_t* increment) {
  int device_id = dev_ctx.GetPlace().GetDeviceId();
-  auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
+  auto gen_cuda = framework::DefaultCUDAGenerator(device_id);

  if (seed) {
    framework::Tensor seed_cpu_tensor;
@@ -34,13 +34,12 @@ inline void GetSeedDataAndIncrement(const phi::GPUContext& dev_ctx,
                                      &seed_cpu_tensor);
    *seed_data = static_cast<uint64_t>(seed_cpu_tensor.data<int>()[0]);
    *increment = offset;
-  } else if (gen_cuda->GetIsInitPy() && (!is_fix_seed)) {
+  } else if (!is_fix_seed) {
    auto seed_offset = gen_cuda->IncrementOffset(offset);
    *seed_data = seed_offset.first;
    *increment = seed_offset.second;
  } else {
-    std::random_device rnd;
-    *seed_data = is_fix_seed ? seed_val : rnd();
+    *seed_data = seed_val;
    *increment = offset;
  }
 }

--- a/paddle/fluid/operators/gaussian_random_op.cu
+++ b/paddle/fluid/operators/gaussian_random_op.cu
@@ -54,26 +54,21 @@ class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
    auto* tensor = context.Output<framework::Tensor>("Out");
    T* data = tensor->mutable_data<T>(context.GetPlace());
    unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
-    bool seed_flag = false;
-    if (seed == 0) {
-      std::random_device rd;
-      seed = rd();
-      seed_flag = true;
-    }
    T mean = static_cast<T>(context.Attr<float>("mean"));
    T std = static_cast<T>(context.Attr<float>("std"));
    int64_t size = tensor->numel();

    int device_id = context.GetPlace().GetDeviceId();
-    auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
+    auto gen_cuda = framework::DefaultCUDAGenerator(device_id);
    auto& dev_cxt =
        context.template device_context<platform::CUDADeviceContext>();

-    if (gen_cuda->GetIsInitPy() && seed_flag) {
+    if (seed == 0) {
+      // use global Generator seed
      auto seed_offset = gen_cuda->IncrementOffset(1);
-      int64_t gen_offset = size * seed_offset.second;
-      auto func = GaussianGenerator<T>(mean, std, seed_offset.first,
-                                       seed_offset.second);
+      uint64_t seed = seed_offset.first;
+      uint64_t offset = seed_offset.second;
+      auto func = GaussianGenerator<T>(mean, std, seed, size * offset);
      phi::IndexKernel<T, GaussianGenerator<T>>(dev_cxt, tensor, func);
    } else {
      auto func = GaussianGenerator<T>(mean, std, seed);

--- a/paddle/fluid/operators/uniform_random_op.h
+++ b/paddle/fluid/operators/uniform_random_op.h
@@ -151,12 +151,6 @@ void UniformRandom(const framework::ExecutionContext& context,
  T* data = tensor->mutable_data<T>(dev_cxt.GetPlace());
  if (size <= 0) return;
  unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
-  bool seed_flag = false;
-  if (seed == 0) {
-    std::random_device rd;
-    seed = rd();
-    seed_flag = true;
-  }

  T min = static_cast<T>(context.Attr<float>("min"));
  T max = static_cast<T>(context.Attr<float>("max"));
@@ -165,14 +159,15 @@ void UniformRandom(const framework::ExecutionContext& context,
  unsigned int diag_step =
      static_cast<unsigned int>(context.Attr<int>("diag_step"));
  T diag_val = static_cast<T>(context.Attr<float>("diag_val"));
-  int device_id = context.GetPlace().GetDeviceId();
-  auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
-  if (gen_cuda->GetIsInitPy() && seed_flag) {
+
+  if (seed == 0) {
+    // Use global Generator seed
    using MT = typename details::MPTypeTrait<T>::Type;
    phi::funcs::uniform_distribution<MT> dist;
    phi::funcs::uniform_real_transform<MT> trans(min, max);
    phi::funcs::distribution_and_transform<T>(dev_cxt, tensor, dist, trans);
  } else {
+    // Use OP seed
    auto func =
        UniformGenerator<T>(min, max, seed, diag_num, diag_step, diag_val);
    phi::IndexKernel<T, UniformGenerator<T>>(dev_cxt, tensor, func);

--- a/paddle/fluid/platform/device_context.cc
+++ b/paddle/fluid/platform/device_context.cc
@@ -169,7 +169,7 @@ inline void EmplaceDeviceContext(

          cuda_ctx->PartialInitWithAllocator();
          dev_ctx->SetGenerator(
-              framework::GetDefaultCUDAGenerator(p.GetDeviceId()).get());
+              framework::DefaultCUDAGenerator(p.GetDeviceId()).get());
 #endif
        } else {
          dev_ctx->SetAllocator(memory::allocation::AllocatorFacade::Instance()

--- a/paddle/fluid/pybind/generator_py.cc
+++ b/paddle/fluid/pybind/generator_py.cc
@@ -55,13 +55,9 @@ void BindGenerator(py::module* m_ptr) {
           })
      .def("seed", &framework::Generator::Seed)
      .def("initial_seed", &framework::Generator::GetCurrentSeed)
-      .def("random", &framework::Generator::Random64)
-      //  .def("get_cpu_engine", &framework::Generator::GetCPUEngine)
-      //  .def("set_cpu_engine", &framework::Generator::SetCPUEngine)
-      .def_property("_is_init_py", &framework::Generator::GetIsInitPy,
-                    &framework::Generator::SetIsInitPy);
+      .def("random", &framework::Generator::Random64);
  m.def("default_cpu_generator", &framework::DefaultCPUGenerator);
-  m.def("default_cuda_generator", &framework::GetDefaultCUDAGenerator);
+  m.def("default_cuda_generator", &framework::DefaultCUDAGenerator);
  m.def("set_random_seed_generator", &framework::SetRandomSeedGenerator);
  m.def("get_random_seed_generator", &framework::GetRandomSeedGenerator);
 }

--- a/paddle/phi/core/generator.h
+++ b/paddle/phi/core/generator.h
@@ -49,12 +49,6 @@ class Generator {
  virtual std::pair<uint64_t, uint64_t> IncrementOffset(
      uint64_t increament_offset) = 0;

-  // NOTE(zhiqiu): is_init_py_ is used to make generator be compatible with
-  // old seed, and it should be removed after all random-related operators
-  // and unittests upgrades to use generator.
-  virtual void SetIsInitPy(bool) = 0;
-  virtual bool GetIsInitPy() const = 0;
-
  virtual uint64_t get_device_id() = 0;
 };


--- a/paddle/phi/kernels/gpu/gaussian_random_kernel.cu
+++ b/paddle/phi/kernels/gpu/gaussian_random_kernel.cu
@@ -59,34 +59,20 @@ void GaussianRandomKernel(const Context& dev_ctx,
                          int seed,
                          DataType dtype,
                          DenseTensor* out) {
-  auto tensor = out;
-
-  bool seed_flag = false;
+  out->Resize(phi::make_ddim(shape.GetData()));
+  dev_ctx.template Alloc<T>(out);
  if (seed == 0) {
-    std::random_device rd;
-    seed = rd();
-    seed_flag = true;
-  }
-
-  tensor->Resize(phi::make_ddim(shape.GetData()));
-
-  T* data = dev_ctx.template Alloc<T>(tensor);
-
-  int64_t size = tensor->numel();
-
-  int device_id = dev_ctx.GetPlace().GetDeviceId();
-  auto gen_cuda = paddle::framework::GetDefaultCUDAGenerator(device_id);
-
-  if (gen_cuda->GetIsInitPy() && seed_flag) {
+    // use global Generator seed
    using MT = typename phi::dtype::MPTypeTrait<T>::Type;
    funcs::normal_distribution<MT> dist;
    funcs::normal_transform<MT> trans(static_cast<MT>(mean),
                                      static_cast<MT>(std));
-    funcs::distribution_and_transform<T>(dev_ctx, tensor, dist, trans);
+    funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans);
  } else {
+    // use OP seed
    auto func =
        GaussianGenerator<T>(static_cast<T>(mean), static_cast<T>(std), seed);
-    IndexKernel<T, GaussianGenerator<T>>(dev_ctx, tensor, func);
+    IndexKernel<T, GaussianGenerator<T>>(dev_ctx, out, func);
  }
 }


--- a/paddle/phi/kernels/gpu/gumbel_softmax_kernel.cu
+++ b/paddle/phi/kernels/gpu/gumbel_softmax_kernel.cu
@@ -27,12 +27,9 @@
 namespace cub = hipcub;
 #endif

-#include <thrust/device_vector.h>
-#include <thrust/host_vector.h>
-#include <thrust/random.h>
-#include <thrust/transform.h>
 #include "paddle/fluid/framework/generator.h"
 #include "paddle/fluid/framework/tensor_util.h"
+#include "paddle/phi/kernels/funcs/distribution_helper.h"
 #include "paddle/phi/kernels/funcs/math_function.h"

 namespace phi {
@@ -144,27 +141,21 @@ struct GumbleNoiseGenerator<GPUContext, T> {
    DenseTensor random_tensor;
    int64_t size = size_to_axis * size_from_axis;
    random_tensor.Resize(make_ddim({size}));
-    auto* random_data = ctx.template Alloc<T>(&random_tensor);
-    thrust::counting_iterator<int64_t> index_sequence_begin(0);
+    T* random_data = ctx.template Alloc<T>(&random_tensor);

    // generate gumbel noise
    int device_id = ctx.GetPlace().GetDeviceId();
-    auto gen_cuda = paddle::framework::GetDefaultCUDAGenerator(device_id);
-    if (gen_cuda->GetIsInitPy()) {
-      auto seed_offset = gen_cuda->IncrementOffset(1);
-      int64_t gen_offset = size * seed_offset.second;
-      thrust::transform(
-          index_sequence_begin,
-          index_sequence_begin + size,
-          thrust::device_ptr<T>(random_data),
-          UniformCUDAGenerator<T>(0.00001, 1, seed_offset.first, gen_offset));
-    } else {
-      const unsigned int seed = std::random_device()();
-      thrust::transform(index_sequence_begin,
-                        index_sequence_begin + size,
-                        thrust::device_ptr<T>(random_data),
-                        UniformCUDAGenerator<T>(0.00001, 1, seed));
-    }
+    auto gen_cuda = ctx.GetGenerator();
+
+    auto seed_offset = gen_cuda->IncrementOffset(1);
+    uint64_t seed = seed_offset.first;
+    uint64_t offset = seed_offset.second;
+
+    thrust::counting_iterator<int64_t> index_sequence_begin(0);
+    thrust::transform(index_sequence_begin,
+                      index_sequence_begin + size,
+                      thrust::device_ptr<T>(random_data),
+                      UniformCUDAGenerator<T>(0.00001, 1, seed, size * offset));

    // add gumbel noise to X
    const int thread_size = 512;

--- a/paddle/phi/kernels/gpu/rnn_kernel.cu.cc
+++ b/paddle/phi/kernels/gpu/rnn_kernel.cu.cc
@@ -175,17 +175,13 @@ void RnnKernel(const Context &dev_ctx,
        mode));

  if (!is_test) {
-    int device_id = dev_ctx.GetPlace().GetDeviceId();
-    auto gen_cuda = paddle::framework::GetDefaultCUDAGenerator(device_id);
-    if (gen_cuda->GetIsInitPy() && seed == 0) {
-      // If perform `manual_seed` in python and inner seed is not specified
-      // (equals 0), use global generator generated seed.
+    if (seed == 0) {
+      // If not specify seed, use global Generator to generate seed.
+      int device_id = dev_ctx.GetPlace().GetDeviceId();
+      auto gen_cuda = paddle::framework::DefaultCUDAGenerator(device_id);
      seed = static_cast<int>(gen_cuda->Random64());
-    } else if (seed == 0) {
-      // use random generated seed
-      std::random_device rd;
-      seed = rd();
-    }  // else use `ctx.Attr<int>("seed")` specified seed
+    }
+    // else use `ctx.Attr<int>("seed")` specified seed
  }

  const T *x_data = x.data<T>();

--- a/paddle/phi/kernels/gpu/truncated_gaussian_random_kernel.cu
+++ b/paddle/phi/kernels/gpu/truncated_gaussian_random_kernel.cu
@@ -90,34 +90,25 @@ void TruncatedGaussianRandomKernel(const Context& dev_ctx,
                                   int seed,
                                   DataType dtype,
                                   DenseTensor* out) {
-  auto tensor = out;
-
-  T* data = dev_ctx.template Alloc<T>(tensor);
-
-  bool seed_flag = false;
-  if (seed == 0) {
-    std::random_device rd;
-    seed = rd();
-    seed_flag = true;
-  }
+  T* data = dev_ctx.template Alloc<T>(out);

  thrust::counting_iterator<int64_t> index_sequence_begin(0);
-  int64_t size = tensor->numel();
+  int64_t size = out->numel();

  auto gen_cuda = dev_ctx.GetGenerator();
-
-  if (gen_cuda->GetIsInitPy() && seed_flag) {
+  if (seed == 0) {
+    // use global Generator seed
    auto seed_offset = gen_cuda->IncrementOffset(1);
-    int64_t gen_offset = size * seed_offset.second;
-    thrust::transform(index_sequence_begin,
-                      index_sequence_begin + size,
-                      thrust::device_ptr<T>(data),
-                      TruncatedNormalOffset<T>(mean,
-                                               std,
-                                               std::numeric_limits<T>::min(),
-                                               seed_offset.first,
-                                               gen_offset));
+    uint64_t seed = seed_offset.first;
+    uint64_t offset = seed_offset.second;
+    thrust::transform(
+        index_sequence_begin,
+        index_sequence_begin + size,
+        thrust::device_ptr<T>(data),
+        TruncatedNormalOffset<T>(
+            mean, std, std::numeric_limits<T>::min(), seed, size * offset));
  } else {
+    // use OP seed
    thrust::transform(
        index_sequence_begin,
        index_sequence_begin + size,

--- a/paddle/phi/kernels/gpu/uniform_random_kernel.cu
+++ b/paddle/phi/kernels/gpu/uniform_random_kernel.cu
@@ -65,22 +65,15 @@ void UniformRandomRawKernel(const Context& dev_ctx,
                            float diag_val,
                            DenseTensor* out) {
  out->Resize(phi::make_ddim(shape.GetData()));
-  T* data = dev_ctx.template Alloc<T>(out);
-  auto size = out->numel();
-  bool seed_flag = false;
+  dev_ctx.template Alloc<T>(out);
  if (seed == 0) {
-    std::random_device rd;
-    seed = rd();
-    seed_flag = true;
-  }
-
-  auto generator = dev_ctx.GetGenerator();
-  if (generator->GetIsInitPy() && seed_flag) {
+    // Use global Generator seed
    using MT = typename kps::details::MPTypeTrait<T>::Type;
    funcs::uniform_distribution<MT> dist;
    funcs::uniform_real_transform<MT> trans(min, max);
    funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans);
  } else {
+    // Use OP seed
    auto func =
        UniformGenerator<T>(min, max, seed, diag_num, diag_step, diag_val);
    IndexKernel<T, UniformGenerator<T>>(dev_ctx, out, func);

--- a/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py
@@ -25,6 +25,8 @@ import paddle
 import paddle.fluid.core as core


+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "Only test cuda Random Generator")
 class TestGeneratorSeed(unittest.TestCase):
    """
    Test cases for cpu generator seed.
@@ -70,15 +72,13 @@ class TestGeneratorSeed(unittest.TestCase):
        """Test Generator seed."""
        fluid.enable_dygraph()

-        paddle.seed(12312321111)
-        x = fluid.layers.gaussian_random([120], dtype="float32")
-        st1 = paddle.get_cuda_rng_state()
-        x1 = fluid.layers.gaussian_random([120], dtype="float32")
-        paddle.set_cuda_rng_state(st1)
-        x2 = fluid.layers.gaussian_random([120], dtype="float32")
-        paddle.seed(12312321111)
-        x3 = fluid.layers.gaussian_random([120], dtype="float32")
-        x_np = x.numpy()
+        st = paddle.get_cuda_rng_state()
+        x1 = paddle.randn([120], dtype="float32")
+        paddle.set_cuda_rng_state(st)
+        x2 = paddle.randn([120], dtype="float32")
+        paddle.set_cuda_rng_state(st)
+        x3 = paddle.randn([120], dtype="float32")
+
        x1_np = x1.numpy()
        x2_np = x2.numpy()
        x3_np = x3.numpy()
@@ -86,7 +86,7 @@ class TestGeneratorSeed(unittest.TestCase):
        if core.is_compiled_with_cuda():
            print(">>>>>>> gaussian random dygraph >>>>>>>")
            self.assertTrue(np.allclose(x1_np, x2_np))
-            self.assertTrue(np.allclose(x_np, x3_np))
+            self.assertTrue(np.allclose(x2_np, x3_np))

    def test_generator_randint_dygraph(self):
        """Test Generator seed."""

--- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
@@ -629,7 +629,6 @@ class ModuleApiTest(unittest.TestCase):
        else:
            fluid.disable_dygraph()
        gen = paddle.seed(self._random_seed)
-        gen._is_init_py = False
        paddle.framework.random._manual_program_seed(self._random_seed)
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):

--- a/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py
+++ b/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py
@@ -178,7 +178,6 @@ class TestUniformRandomOpAPISeed(unittest.TestCase):
    def test_attr_tensor_API(self):
        _seed = 10
        gen = paddle.seed(_seed)
-        gen._is_init_py = False
        startup_program = fluid.Program()
        train_program = fluid.Program()
        with fluid.program_guard(train_program, startup_program):

--- a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py
+++ b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py
@@ -370,7 +370,6 @@ class TestUniformRandomOp_API_seed(unittest.TestCase):
    def test_attr_tensor_API(self):
        _seed = 10
        gen = paddle.seed(_seed)
-        gen._is_init_py = False
        startup_program = fluid.Program()
        train_program = fluid.Program()
        with fluid.program_guard(train_program, startup_program):

--- a/python/paddle/framework/random.py
+++ b/python/paddle/framework/random.py
@@ -44,10 +44,8 @@ def seed(seed):

    if core.is_compiled_with_cuda():
        for i in range(core.get_cuda_device_count()):
-            core.default_cuda_generator(i)._is_init_py = True
            core.default_cuda_generator(i).manual_seed(seed)

-    core.default_cpu_generator()._is_init_py = True
    return core.default_cpu_generator().manual_seed(seed)