未验证 提交 3b488bae 编写于 作者: zhouweiwei2014's avatar zhouweiwei2014 提交者: GitHub

remove is_init_py of RandomGenerator, and use Global RandomGenerator by default (#42876)

* remove is_init_py of RandomGenerator, and use Global Generator if not OP seed

* fix comment
上级 2b4977f2
...@@ -24,7 +24,7 @@ limitations under the License. */ ...@@ -24,7 +24,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
const std::shared_ptr<Generator>& GetDefaultCUDAGenerator(int64_t device_id) { const std::shared_ptr<Generator>& DefaultCUDAGenerator(int64_t device_id) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
static int64_t num_cuda_devices = -1; static int64_t num_cuda_devices = -1;
...@@ -58,8 +58,6 @@ const std::shared_ptr<Generator>& GetDefaultCUDAGenerator(int64_t device_id) { ...@@ -58,8 +58,6 @@ const std::shared_ptr<Generator>& GetDefaultCUDAGenerator(int64_t device_id) {
const std::shared_ptr<Generator>& DefaultCPUGenerator() { const std::shared_ptr<Generator>& DefaultCPUGenerator() {
static auto default_cpu_generator = static auto default_cpu_generator =
std::make_shared<Generator>(GetRandomSeed()); std::make_shared<Generator>(GetRandomSeed());
VLOG(4) << "initial seed: " << default_cpu_generator->GetCurrentSeed()
<< ", cpu engine: " << default_cpu_generator->GetCPUEngine().get();
return default_cpu_generator; return default_cpu_generator;
} }
...@@ -100,19 +98,13 @@ const std::shared_ptr<Generator>& GetRandomSeedGenerator( ...@@ -100,19 +98,13 @@ const std::shared_ptr<Generator>& GetRandomSeedGenerator(
return iter->second; return iter->second;
} }
std::shared_ptr<std::mt19937_64> OpDefaultCPUEngine() { // There are 3 conditions:
static auto op_default_cpu_engine = std::make_shared<std::mt19937_64>(); // (1) op seed is set, use op seed.
return op_default_cpu_engine; // (2) op seed is not set, global seed is set, use global seed.
} // (3) op seed is not set, global seed is not set too, use random seed from
// RandomGenerator.
// NOTE(zhiqiu): there are 3 conditions:
// (1) op seed is not set and DefaultCPUGenerator is inited, use
// DefaultCPUGenerator
// (2) op seed is not set and DefaultCPUGenerator is not inited, use se
// OpDefaultCPUEngine() and set a radnom seed
// (3) op seed is set, use OpDefaultCPUEngine() and set the seed
std::shared_ptr<std::mt19937_64> GetCPURandomEngine(uint64_t seed) { std::shared_ptr<std::mt19937_64> GetCPURandomEngine(uint64_t seed) {
if (DefaultCPUGenerator()->GetIsInitPy() && seed == 0) { if (seed == 0) {
VLOG(4) << "Use random engine from generator"; VLOG(4) << "Use random engine from generator";
return DefaultCPUGenerator()->GetCPUEngine(); return DefaultCPUGenerator()->GetCPUEngine();
} else { } else {
...@@ -123,12 +115,6 @@ std::shared_ptr<std::mt19937_64> GetCPURandomEngine(uint64_t seed) { ...@@ -123,12 +115,6 @@ std::shared_ptr<std::mt19937_64> GetCPURandomEngine(uint64_t seed) {
// //
// And we need to measure the determinacy of Generator in PE. // And we need to measure the determinacy of Generator in PE.
auto engine = std::make_shared<std::mt19937_64>(); auto engine = std::make_shared<std::mt19937_64>();
if (seed == 0) {
seed = GetRandomSeed();
VLOG(4) << "Use default random engine with random seed = " << seed;
} else {
VLOG(4) << "Use default random engine with fixed random seed = " << seed;
}
static std::mutex mu_; static std::mutex mu_;
{ {
std::lock_guard<std::mutex> lock(mu_); std::lock_guard<std::mutex> lock(mu_);
...@@ -204,11 +190,5 @@ std::pair<uint64_t, uint64_t> Generator::IncrementOffset( ...@@ -204,11 +190,5 @@ std::pair<uint64_t, uint64_t> Generator::IncrementOffset(
#endif #endif
} }
void Generator::SetIsInitPy(bool is_init_py) {
this->is_init_py_ = is_init_py;
VLOG(4) << "SetIsInitPy:" << this->is_init_py_;
}
bool Generator::GetIsInitPy() const { return this->is_init_py_; }
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -59,7 +59,6 @@ struct Generator : public phi::Generator { ...@@ -59,7 +59,6 @@ struct Generator : public phi::Generator {
this->engine_ = engine; this->engine_ = engine;
VLOG(4) << "initial seed: " << this->state_.current_seed VLOG(4) << "initial seed: " << this->state_.current_seed
<< ", cpu engine: " << &this->state_.cpu_engine; << ", cpu engine: " << &this->state_.cpu_engine;
this->is_init_py_ = true; // TODO(zhiqiu): remove it in future
} }
Generator(uint64_t seed, uint64_t device_id) { Generator(uint64_t seed, uint64_t device_id) {
std::seed_seq seq({seed}); std::seed_seq seq({seed});
...@@ -71,7 +70,6 @@ struct Generator : public phi::Generator { ...@@ -71,7 +70,6 @@ struct Generator : public phi::Generator {
this->engine_ = engine; this->engine_ = engine;
VLOG(4) << "initial seed: " << this->state_.current_seed VLOG(4) << "initial seed: " << this->state_.current_seed
<< ", cpu engine: " << &this->state_.cpu_engine; << ", cpu engine: " << &this->state_.cpu_engine;
this->is_init_py_ = false; // TODO(zhiqiu): remove it in future
} }
Generator(const Generator& other) = delete; Generator(const Generator& other) = delete;
...@@ -95,32 +93,21 @@ struct Generator : public phi::Generator { ...@@ -95,32 +93,21 @@ struct Generator : public phi::Generator {
std::pair<uint64_t, uint64_t> IncrementOffset(uint64_t increament_offset); std::pair<uint64_t, uint64_t> IncrementOffset(uint64_t increament_offset);
void SetIsInitPy(bool);
bool GetIsInitPy() const;
uint64_t get_device_id() { return this->state_.device; } uint64_t get_device_id() { return this->state_.device; }
private: private:
phi::Generator::GeneratorState state_; phi::Generator::GeneratorState state_;
std::shared_ptr<std::mt19937_64> engine_; std::shared_ptr<std::mt19937_64> engine_;
mutable std::mutex mu_; mutable std::mutex mu_;
// NOTE(zhiqiu): is_init_py_ is used to make generator be compatible with
// old seed, and it should be removed after all random-related operators
// and unittests upgrades to use generator.
bool is_init_py_ = false;
}; };
// The DefaultCPUGenerator is used in manual_seed() // The DefaultCPUGenerator is used in manual_seed()
const std::shared_ptr<Generator>& DefaultCPUGenerator(); const std::shared_ptr<Generator>& DefaultCPUGenerator();
// If op seed is set or global is not set, the OpDefaultCPUEngine is used. const std::shared_ptr<Generator>& DefaultCUDAGenerator(int64_t device_id = -1);
std::shared_ptr<std::mt19937_64> OpDefaultCPUEngine();
std::shared_ptr<std::mt19937_64> GetCPURandomEngine(uint64_t); std::shared_ptr<std::mt19937_64> GetCPURandomEngine(uint64_t);
const std::shared_ptr<Generator>& GetDefaultCUDAGenerator(
int64_t device_id = -1);
const std::shared_ptr<Generator>& SetRandomSeedGenerator( const std::shared_ptr<Generator>& SetRandomSeedGenerator(
const std::string& name, uint64_t seed); const std::string& name, uint64_t seed);
......
...@@ -416,14 +416,13 @@ class ClassCenterSampleCUDAKernel : public framework::OpKernel<T> { ...@@ -416,14 +416,13 @@ class ClassCenterSampleCUDAKernel : public framework::OpKernel<T> {
1) * 1) *
vec_size; vec_size;
int device_id = ctx.GetPlace().GetDeviceId(); int device_id = ctx.GetPlace().GetDeviceId();
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id); auto gen_cuda = framework::DefaultCUDAGenerator(device_id);
if (gen_cuda->GetIsInitPy() && (!fix_seed)) { if (!fix_seed) {
auto seed_offset = gen_cuda->IncrementOffset(offset); auto seed_offset = gen_cuda->IncrementOffset(offset);
seed_data = seed_offset.first; seed_data = seed_offset.first;
increment = seed_offset.second; increment = seed_offset.second;
} else { } else {
std::random_device rnd; seed_data = seed + rank;
seed_data = fix_seed ? seed + rank : rnd();
increment = offset; increment = offset;
} }
RandomSampleClassCenter<T><<<NumBlocks(num_classes), kNumCUDAThreads, 0, RandomSampleClassCenter<T><<<NumBlocks(num_classes), kNumCUDAThreads, 0,
......
...@@ -172,17 +172,13 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> { ...@@ -172,17 +172,13 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
int seed = ctx.Attr<int>("seed"); int seed = ctx.Attr<int>("seed");
if (!is_test) { if (!is_test) {
int device_id = ctx.GetPlace().GetDeviceId(); if (seed == 0) {
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id); // If not specify seed, use global Generator to generate seed.
if (gen_cuda->GetIsInitPy() && seed == 0) { int device_id = ctx.GetPlace().GetDeviceId();
// If perform `manual_seed` in python and inner seed is not specified auto gen_cuda = paddle::framework::DefaultCUDAGenerator(device_id);
// (equals 0), use global generator generated seed.
seed = static_cast<int>(gen_cuda->Random64()); seed = static_cast<int>(gen_cuda->Random64());
} else if (seed == 0) { }
// use random generated seed // else use `ctx.Attr<int>("seed")` specified seed
std::random_device rd;
seed = rd();
} // else use `ctx.Attr<int>("seed")` specified seed
} }
bool has_seq_length = ctx.HasInput("SequenceLength"); bool has_seq_length = ctx.HasInput("SequenceLength");
......
...@@ -77,7 +77,7 @@ struct DirichletSampler<platform::CUDADeviceContext, T> { ...@@ -77,7 +77,7 @@ struct DirichletSampler<platform::CUDADeviceContext, T> {
// init state, seed & offset for all threads // init state, seed & offset for all threads
int device_id = ctx.GetPlace().GetDeviceId(); int device_id = ctx.GetPlace().GetDeviceId();
auto p_gen = framework::GetDefaultCUDAGenerator(device_id); auto p_gen = framework::DefaultCUDAGenerator(device_id);
auto seed_and_offset = p_gen->IncrementOffset(10); // hard-coded offset auto seed_and_offset = p_gen->IncrementOffset(10); // hard-coded offset
auto seed = seed_and_offset.first; auto seed = seed_and_offset.first;
auto offset = seed_and_offset.second; auto offset = seed_and_offset.second;
......
...@@ -26,7 +26,7 @@ inline void GetSeedDataAndIncrement(const phi::GPUContext& dev_ctx, ...@@ -26,7 +26,7 @@ inline void GetSeedDataAndIncrement(const phi::GPUContext& dev_ctx,
const int offset, uint64_t* seed_data, const int offset, uint64_t* seed_data,
uint64_t* increment) { uint64_t* increment) {
int device_id = dev_ctx.GetPlace().GetDeviceId(); int device_id = dev_ctx.GetPlace().GetDeviceId();
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id); auto gen_cuda = framework::DefaultCUDAGenerator(device_id);
if (seed) { if (seed) {
framework::Tensor seed_cpu_tensor; framework::Tensor seed_cpu_tensor;
...@@ -34,13 +34,12 @@ inline void GetSeedDataAndIncrement(const phi::GPUContext& dev_ctx, ...@@ -34,13 +34,12 @@ inline void GetSeedDataAndIncrement(const phi::GPUContext& dev_ctx,
&seed_cpu_tensor); &seed_cpu_tensor);
*seed_data = static_cast<uint64_t>(seed_cpu_tensor.data<int>()[0]); *seed_data = static_cast<uint64_t>(seed_cpu_tensor.data<int>()[0]);
*increment = offset; *increment = offset;
} else if (gen_cuda->GetIsInitPy() && (!is_fix_seed)) { } else if (!is_fix_seed) {
auto seed_offset = gen_cuda->IncrementOffset(offset); auto seed_offset = gen_cuda->IncrementOffset(offset);
*seed_data = seed_offset.first; *seed_data = seed_offset.first;
*increment = seed_offset.second; *increment = seed_offset.second;
} else { } else {
std::random_device rnd; *seed_data = seed_val;
*seed_data = is_fix_seed ? seed_val : rnd();
*increment = offset; *increment = offset;
} }
} }
......
...@@ -54,26 +54,21 @@ class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> { ...@@ -54,26 +54,21 @@ class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
auto* tensor = context.Output<framework::Tensor>("Out"); auto* tensor = context.Output<framework::Tensor>("Out");
T* data = tensor->mutable_data<T>(context.GetPlace()); T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed")); unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
bool seed_flag = false;
if (seed == 0) {
std::random_device rd;
seed = rd();
seed_flag = true;
}
T mean = static_cast<T>(context.Attr<float>("mean")); T mean = static_cast<T>(context.Attr<float>("mean"));
T std = static_cast<T>(context.Attr<float>("std")); T std = static_cast<T>(context.Attr<float>("std"));
int64_t size = tensor->numel(); int64_t size = tensor->numel();
int device_id = context.GetPlace().GetDeviceId(); int device_id = context.GetPlace().GetDeviceId();
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id); auto gen_cuda = framework::DefaultCUDAGenerator(device_id);
auto& dev_cxt = auto& dev_cxt =
context.template device_context<platform::CUDADeviceContext>(); context.template device_context<platform::CUDADeviceContext>();
if (gen_cuda->GetIsInitPy() && seed_flag) { if (seed == 0) {
// use global Generator seed
auto seed_offset = gen_cuda->IncrementOffset(1); auto seed_offset = gen_cuda->IncrementOffset(1);
int64_t gen_offset = size * seed_offset.second; uint64_t seed = seed_offset.first;
auto func = GaussianGenerator<T>(mean, std, seed_offset.first, uint64_t offset = seed_offset.second;
seed_offset.second); auto func = GaussianGenerator<T>(mean, std, seed, size * offset);
phi::IndexKernel<T, GaussianGenerator<T>>(dev_cxt, tensor, func); phi::IndexKernel<T, GaussianGenerator<T>>(dev_cxt, tensor, func);
} else { } else {
auto func = GaussianGenerator<T>(mean, std, seed); auto func = GaussianGenerator<T>(mean, std, seed);
......
...@@ -151,12 +151,6 @@ void UniformRandom(const framework::ExecutionContext& context, ...@@ -151,12 +151,6 @@ void UniformRandom(const framework::ExecutionContext& context,
T* data = tensor->mutable_data<T>(dev_cxt.GetPlace()); T* data = tensor->mutable_data<T>(dev_cxt.GetPlace());
if (size <= 0) return; if (size <= 0) return;
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed")); unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
bool seed_flag = false;
if (seed == 0) {
std::random_device rd;
seed = rd();
seed_flag = true;
}
T min = static_cast<T>(context.Attr<float>("min")); T min = static_cast<T>(context.Attr<float>("min"));
T max = static_cast<T>(context.Attr<float>("max")); T max = static_cast<T>(context.Attr<float>("max"));
...@@ -165,14 +159,15 @@ void UniformRandom(const framework::ExecutionContext& context, ...@@ -165,14 +159,15 @@ void UniformRandom(const framework::ExecutionContext& context,
unsigned int diag_step = unsigned int diag_step =
static_cast<unsigned int>(context.Attr<int>("diag_step")); static_cast<unsigned int>(context.Attr<int>("diag_step"));
T diag_val = static_cast<T>(context.Attr<float>("diag_val")); T diag_val = static_cast<T>(context.Attr<float>("diag_val"));
int device_id = context.GetPlace().GetDeviceId();
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id); if (seed == 0) {
if (gen_cuda->GetIsInitPy() && seed_flag) { // Use global Generator seed
using MT = typename details::MPTypeTrait<T>::Type; using MT = typename details::MPTypeTrait<T>::Type;
phi::funcs::uniform_distribution<MT> dist; phi::funcs::uniform_distribution<MT> dist;
phi::funcs::uniform_real_transform<MT> trans(min, max); phi::funcs::uniform_real_transform<MT> trans(min, max);
phi::funcs::distribution_and_transform<T>(dev_cxt, tensor, dist, trans); phi::funcs::distribution_and_transform<T>(dev_cxt, tensor, dist, trans);
} else { } else {
// Use OP seed
auto func = auto func =
UniformGenerator<T>(min, max, seed, diag_num, diag_step, diag_val); UniformGenerator<T>(min, max, seed, diag_num, diag_step, diag_val);
phi::IndexKernel<T, UniformGenerator<T>>(dev_cxt, tensor, func); phi::IndexKernel<T, UniformGenerator<T>>(dev_cxt, tensor, func);
......
...@@ -169,7 +169,7 @@ inline void EmplaceDeviceContext( ...@@ -169,7 +169,7 @@ inline void EmplaceDeviceContext(
cuda_ctx->PartialInitWithAllocator(); cuda_ctx->PartialInitWithAllocator();
dev_ctx->SetGenerator( dev_ctx->SetGenerator(
framework::GetDefaultCUDAGenerator(p.GetDeviceId()).get()); framework::DefaultCUDAGenerator(p.GetDeviceId()).get());
#endif #endif
} else { } else {
dev_ctx->SetAllocator(memory::allocation::AllocatorFacade::Instance() dev_ctx->SetAllocator(memory::allocation::AllocatorFacade::Instance()
......
...@@ -55,13 +55,9 @@ void BindGenerator(py::module* m_ptr) { ...@@ -55,13 +55,9 @@ void BindGenerator(py::module* m_ptr) {
}) })
.def("seed", &framework::Generator::Seed) .def("seed", &framework::Generator::Seed)
.def("initial_seed", &framework::Generator::GetCurrentSeed) .def("initial_seed", &framework::Generator::GetCurrentSeed)
.def("random", &framework::Generator::Random64) .def("random", &framework::Generator::Random64);
// .def("get_cpu_engine", &framework::Generator::GetCPUEngine)
// .def("set_cpu_engine", &framework::Generator::SetCPUEngine)
.def_property("_is_init_py", &framework::Generator::GetIsInitPy,
&framework::Generator::SetIsInitPy);
m.def("default_cpu_generator", &framework::DefaultCPUGenerator); m.def("default_cpu_generator", &framework::DefaultCPUGenerator);
m.def("default_cuda_generator", &framework::GetDefaultCUDAGenerator); m.def("default_cuda_generator", &framework::DefaultCUDAGenerator);
m.def("set_random_seed_generator", &framework::SetRandomSeedGenerator); m.def("set_random_seed_generator", &framework::SetRandomSeedGenerator);
m.def("get_random_seed_generator", &framework::GetRandomSeedGenerator); m.def("get_random_seed_generator", &framework::GetRandomSeedGenerator);
} }
......
...@@ -49,12 +49,6 @@ class Generator { ...@@ -49,12 +49,6 @@ class Generator {
virtual std::pair<uint64_t, uint64_t> IncrementOffset( virtual std::pair<uint64_t, uint64_t> IncrementOffset(
uint64_t increament_offset) = 0; uint64_t increament_offset) = 0;
// NOTE(zhiqiu): is_init_py_ is used to make generator be compatible with
// old seed, and it should be removed after all random-related operators
// and unittests upgrades to use generator.
virtual void SetIsInitPy(bool) = 0;
virtual bool GetIsInitPy() const = 0;
virtual uint64_t get_device_id() = 0; virtual uint64_t get_device_id() = 0;
}; };
......
...@@ -59,34 +59,20 @@ void GaussianRandomKernel(const Context& dev_ctx, ...@@ -59,34 +59,20 @@ void GaussianRandomKernel(const Context& dev_ctx,
int seed, int seed,
DataType dtype, DataType dtype,
DenseTensor* out) { DenseTensor* out) {
auto tensor = out; out->Resize(phi::make_ddim(shape.GetData()));
dev_ctx.template Alloc<T>(out);
bool seed_flag = false;
if (seed == 0) { if (seed == 0) {
std::random_device rd; // use global Generator seed
seed = rd();
seed_flag = true;
}
tensor->Resize(phi::make_ddim(shape.GetData()));
T* data = dev_ctx.template Alloc<T>(tensor);
int64_t size = tensor->numel();
int device_id = dev_ctx.GetPlace().GetDeviceId();
auto gen_cuda = paddle::framework::GetDefaultCUDAGenerator(device_id);
if (gen_cuda->GetIsInitPy() && seed_flag) {
using MT = typename phi::dtype::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
funcs::normal_distribution<MT> dist; funcs::normal_distribution<MT> dist;
funcs::normal_transform<MT> trans(static_cast<MT>(mean), funcs::normal_transform<MT> trans(static_cast<MT>(mean),
static_cast<MT>(std)); static_cast<MT>(std));
funcs::distribution_and_transform<T>(dev_ctx, tensor, dist, trans); funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans);
} else { } else {
// use OP seed
auto func = auto func =
GaussianGenerator<T>(static_cast<T>(mean), static_cast<T>(std), seed); GaussianGenerator<T>(static_cast<T>(mean), static_cast<T>(std), seed);
IndexKernel<T, GaussianGenerator<T>>(dev_ctx, tensor, func); IndexKernel<T, GaussianGenerator<T>>(dev_ctx, out, func);
} }
} }
......
...@@ -27,12 +27,9 @@ ...@@ -27,12 +27,9 @@
namespace cub = hipcub; namespace cub = hipcub;
#endif #endif
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/random.h>
#include <thrust/transform.h>
#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/kernels/funcs/distribution_helper.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
namespace phi { namespace phi {
...@@ -144,27 +141,21 @@ struct GumbleNoiseGenerator<GPUContext, T> { ...@@ -144,27 +141,21 @@ struct GumbleNoiseGenerator<GPUContext, T> {
DenseTensor random_tensor; DenseTensor random_tensor;
int64_t size = size_to_axis * size_from_axis; int64_t size = size_to_axis * size_from_axis;
random_tensor.Resize(make_ddim({size})); random_tensor.Resize(make_ddim({size}));
auto* random_data = ctx.template Alloc<T>(&random_tensor); T* random_data = ctx.template Alloc<T>(&random_tensor);
thrust::counting_iterator<int64_t> index_sequence_begin(0);
// generate gumbel noise // generate gumbel noise
int device_id = ctx.GetPlace().GetDeviceId(); int device_id = ctx.GetPlace().GetDeviceId();
auto gen_cuda = paddle::framework::GetDefaultCUDAGenerator(device_id); auto gen_cuda = ctx.GetGenerator();
if (gen_cuda->GetIsInitPy()) {
auto seed_offset = gen_cuda->IncrementOffset(1); auto seed_offset = gen_cuda->IncrementOffset(1);
int64_t gen_offset = size * seed_offset.second; uint64_t seed = seed_offset.first;
thrust::transform( uint64_t offset = seed_offset.second;
index_sequence_begin,
index_sequence_begin + size, thrust::counting_iterator<int64_t> index_sequence_begin(0);
thrust::device_ptr<T>(random_data), thrust::transform(index_sequence_begin,
UniformCUDAGenerator<T>(0.00001, 1, seed_offset.first, gen_offset)); index_sequence_begin + size,
} else { thrust::device_ptr<T>(random_data),
const unsigned int seed = std::random_device()(); UniformCUDAGenerator<T>(0.00001, 1, seed, size * offset));
thrust::transform(index_sequence_begin,
index_sequence_begin + size,
thrust::device_ptr<T>(random_data),
UniformCUDAGenerator<T>(0.00001, 1, seed));
}
// add gumbel noise to X // add gumbel noise to X
const int thread_size = 512; const int thread_size = 512;
......
...@@ -175,17 +175,13 @@ void RnnKernel(const Context &dev_ctx, ...@@ -175,17 +175,13 @@ void RnnKernel(const Context &dev_ctx,
mode)); mode));
if (!is_test) { if (!is_test) {
int device_id = dev_ctx.GetPlace().GetDeviceId(); if (seed == 0) {
auto gen_cuda = paddle::framework::GetDefaultCUDAGenerator(device_id); // If not specify seed, use global Generator to generate seed.
if (gen_cuda->GetIsInitPy() && seed == 0) { int device_id = dev_ctx.GetPlace().GetDeviceId();
// If perform `manual_seed` in python and inner seed is not specified auto gen_cuda = paddle::framework::DefaultCUDAGenerator(device_id);
// (equals 0), use global generator generated seed.
seed = static_cast<int>(gen_cuda->Random64()); seed = static_cast<int>(gen_cuda->Random64());
} else if (seed == 0) { }
// use random generated seed // else use `ctx.Attr<int>("seed")` specified seed
std::random_device rd;
seed = rd();
} // else use `ctx.Attr<int>("seed")` specified seed
} }
const T *x_data = x.data<T>(); const T *x_data = x.data<T>();
......
...@@ -90,34 +90,25 @@ void TruncatedGaussianRandomKernel(const Context& dev_ctx, ...@@ -90,34 +90,25 @@ void TruncatedGaussianRandomKernel(const Context& dev_ctx,
int seed, int seed,
DataType dtype, DataType dtype,
DenseTensor* out) { DenseTensor* out) {
auto tensor = out; T* data = dev_ctx.template Alloc<T>(out);
T* data = dev_ctx.template Alloc<T>(tensor);
bool seed_flag = false;
if (seed == 0) {
std::random_device rd;
seed = rd();
seed_flag = true;
}
thrust::counting_iterator<int64_t> index_sequence_begin(0); thrust::counting_iterator<int64_t> index_sequence_begin(0);
int64_t size = tensor->numel(); int64_t size = out->numel();
auto gen_cuda = dev_ctx.GetGenerator(); auto gen_cuda = dev_ctx.GetGenerator();
if (seed == 0) {
if (gen_cuda->GetIsInitPy() && seed_flag) { // use global Generator seed
auto seed_offset = gen_cuda->IncrementOffset(1); auto seed_offset = gen_cuda->IncrementOffset(1);
int64_t gen_offset = size * seed_offset.second; uint64_t seed = seed_offset.first;
thrust::transform(index_sequence_begin, uint64_t offset = seed_offset.second;
index_sequence_begin + size, thrust::transform(
thrust::device_ptr<T>(data), index_sequence_begin,
TruncatedNormalOffset<T>(mean, index_sequence_begin + size,
std, thrust::device_ptr<T>(data),
std::numeric_limits<T>::min(), TruncatedNormalOffset<T>(
seed_offset.first, mean, std, std::numeric_limits<T>::min(), seed, size * offset));
gen_offset));
} else { } else {
// use OP seed
thrust::transform( thrust::transform(
index_sequence_begin, index_sequence_begin,
index_sequence_begin + size, index_sequence_begin + size,
......
...@@ -65,22 +65,15 @@ void UniformRandomRawKernel(const Context& dev_ctx, ...@@ -65,22 +65,15 @@ void UniformRandomRawKernel(const Context& dev_ctx,
float diag_val, float diag_val,
DenseTensor* out) { DenseTensor* out) {
out->Resize(phi::make_ddim(shape.GetData())); out->Resize(phi::make_ddim(shape.GetData()));
T* data = dev_ctx.template Alloc<T>(out); dev_ctx.template Alloc<T>(out);
auto size = out->numel();
bool seed_flag = false;
if (seed == 0) { if (seed == 0) {
std::random_device rd; // Use global Generator seed
seed = rd();
seed_flag = true;
}
auto generator = dev_ctx.GetGenerator();
if (generator->GetIsInitPy() && seed_flag) {
using MT = typename kps::details::MPTypeTrait<T>::Type; using MT = typename kps::details::MPTypeTrait<T>::Type;
funcs::uniform_distribution<MT> dist; funcs::uniform_distribution<MT> dist;
funcs::uniform_real_transform<MT> trans(min, max); funcs::uniform_real_transform<MT> trans(min, max);
funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans); funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans);
} else { } else {
// Use OP seed
auto func = auto func =
UniformGenerator<T>(min, max, seed, diag_num, diag_step, diag_val); UniformGenerator<T>(min, max, seed, diag_num, diag_step, diag_val);
IndexKernel<T, UniformGenerator<T>>(dev_ctx, out, func); IndexKernel<T, UniformGenerator<T>>(dev_ctx, out, func);
......
...@@ -25,6 +25,8 @@ import paddle ...@@ -25,6 +25,8 @@ import paddle
import paddle.fluid.core as core import paddle.fluid.core as core
@unittest.skipIf(not core.is_compiled_with_cuda(),
"Only test cuda Random Generator")
class TestGeneratorSeed(unittest.TestCase): class TestGeneratorSeed(unittest.TestCase):
""" """
Test cases for cpu generator seed. Test cases for cpu generator seed.
...@@ -70,15 +72,13 @@ class TestGeneratorSeed(unittest.TestCase): ...@@ -70,15 +72,13 @@ class TestGeneratorSeed(unittest.TestCase):
"""Test Generator seed.""" """Test Generator seed."""
fluid.enable_dygraph() fluid.enable_dygraph()
paddle.seed(12312321111) st = paddle.get_cuda_rng_state()
x = fluid.layers.gaussian_random([120], dtype="float32") x1 = paddle.randn([120], dtype="float32")
st1 = paddle.get_cuda_rng_state() paddle.set_cuda_rng_state(st)
x1 = fluid.layers.gaussian_random([120], dtype="float32") x2 = paddle.randn([120], dtype="float32")
paddle.set_cuda_rng_state(st1) paddle.set_cuda_rng_state(st)
x2 = fluid.layers.gaussian_random([120], dtype="float32") x3 = paddle.randn([120], dtype="float32")
paddle.seed(12312321111)
x3 = fluid.layers.gaussian_random([120], dtype="float32")
x_np = x.numpy()
x1_np = x1.numpy() x1_np = x1.numpy()
x2_np = x2.numpy() x2_np = x2.numpy()
x3_np = x3.numpy() x3_np = x3.numpy()
...@@ -86,7 +86,7 @@ class TestGeneratorSeed(unittest.TestCase): ...@@ -86,7 +86,7 @@ class TestGeneratorSeed(unittest.TestCase):
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
print(">>>>>>> gaussian random dygraph >>>>>>>") print(">>>>>>> gaussian random dygraph >>>>>>>")
self.assertTrue(np.allclose(x1_np, x2_np)) self.assertTrue(np.allclose(x1_np, x2_np))
self.assertTrue(np.allclose(x_np, x3_np)) self.assertTrue(np.allclose(x2_np, x3_np))
def test_generator_randint_dygraph(self): def test_generator_randint_dygraph(self):
"""Test Generator seed.""" """Test Generator seed."""
......
...@@ -629,7 +629,6 @@ class ModuleApiTest(unittest.TestCase): ...@@ -629,7 +629,6 @@ class ModuleApiTest(unittest.TestCase):
else: else:
fluid.disable_dygraph() fluid.disable_dygraph()
gen = paddle.seed(self._random_seed) gen = paddle.seed(self._random_seed)
gen._is_init_py = False
paddle.framework.random._manual_program_seed(self._random_seed) paddle.framework.random._manual_program_seed(self._random_seed)
scope = fluid.core.Scope() scope = fluid.core.Scope()
with fluid.scope_guard(scope): with fluid.scope_guard(scope):
......
...@@ -178,7 +178,6 @@ class TestUniformRandomOpAPISeed(unittest.TestCase): ...@@ -178,7 +178,6 @@ class TestUniformRandomOpAPISeed(unittest.TestCase):
def test_attr_tensor_API(self): def test_attr_tensor_API(self):
_seed = 10 _seed = 10
gen = paddle.seed(_seed) gen = paddle.seed(_seed)
gen._is_init_py = False
startup_program = fluid.Program() startup_program = fluid.Program()
train_program = fluid.Program() train_program = fluid.Program()
with fluid.program_guard(train_program, startup_program): with fluid.program_guard(train_program, startup_program):
......
...@@ -370,7 +370,6 @@ class TestUniformRandomOp_API_seed(unittest.TestCase): ...@@ -370,7 +370,6 @@ class TestUniformRandomOp_API_seed(unittest.TestCase):
def test_attr_tensor_API(self): def test_attr_tensor_API(self):
_seed = 10 _seed = 10
gen = paddle.seed(_seed) gen = paddle.seed(_seed)
gen._is_init_py = False
startup_program = fluid.Program() startup_program = fluid.Program()
train_program = fluid.Program() train_program = fluid.Program()
with fluid.program_guard(train_program, startup_program): with fluid.program_guard(train_program, startup_program):
......
...@@ -44,10 +44,8 @@ def seed(seed): ...@@ -44,10 +44,8 @@ def seed(seed):
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
for i in range(core.get_cuda_device_count()): for i in range(core.get_cuda_device_count()):
core.default_cuda_generator(i)._is_init_py = True
core.default_cuda_generator(i).manual_seed(seed) core.default_cuda_generator(i).manual_seed(seed)
core.default_cpu_generator()._is_init_py = True
return core.default_cpu_generator().manual_seed(seed) return core.default_cpu_generator().manual_seed(seed)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册