未验证 提交 3b488bae 编写于 作者: zhouweiwei2014's avatar zhouweiwei2014 提交者: GitHub

remove is_init_py of RandomGenerator, and use Global RandomGenerator by default (#42876)

* remove is_init_py of RandomGenerator, and use Global Generator if not OP seed

* fix comment
上级 2b4977f2
......@@ -24,7 +24,7 @@ limitations under the License. */
namespace paddle {
namespace framework {
const std::shared_ptr<Generator>& GetDefaultCUDAGenerator(int64_t device_id) {
const std::shared_ptr<Generator>& DefaultCUDAGenerator(int64_t device_id) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
static int64_t num_cuda_devices = -1;
......@@ -58,8 +58,6 @@ const std::shared_ptr<Generator>& GetDefaultCUDAGenerator(int64_t device_id) {
const std::shared_ptr<Generator>& DefaultCPUGenerator() {
static auto default_cpu_generator =
std::make_shared<Generator>(GetRandomSeed());
VLOG(4) << "initial seed: " << default_cpu_generator->GetCurrentSeed()
<< ", cpu engine: " << default_cpu_generator->GetCPUEngine().get();
return default_cpu_generator;
}
......@@ -100,19 +98,13 @@ const std::shared_ptr<Generator>& GetRandomSeedGenerator(
return iter->second;
}
std::shared_ptr<std::mt19937_64> OpDefaultCPUEngine() {
static auto op_default_cpu_engine = std::make_shared<std::mt19937_64>();
return op_default_cpu_engine;
}
// NOTE(zhiqiu): there are 3 conditions:
// (1) op seed is not set and DefaultCPUGenerator is inited, use
// DefaultCPUGenerator
// (2) op seed is not set and DefaultCPUGenerator is not inited, use se
// OpDefaultCPUEngine() and set a radnom seed
// (3) op seed is set, use OpDefaultCPUEngine() and set the seed
// There are 3 conditions:
// (1) op seed is set, use op seed.
// (2) op seed is not set, global seed is set, use global seed.
// (3) op seed is not set, global seed is not set too, use random seed from
// RandomGenerator.
std::shared_ptr<std::mt19937_64> GetCPURandomEngine(uint64_t seed) {
if (DefaultCPUGenerator()->GetIsInitPy() && seed == 0) {
if (seed == 0) {
VLOG(4) << "Use random engine from generator";
return DefaultCPUGenerator()->GetCPUEngine();
} else {
......@@ -123,12 +115,6 @@ std::shared_ptr<std::mt19937_64> GetCPURandomEngine(uint64_t seed) {
//
// And we need to measure the determinacy of Generator in PE.
auto engine = std::make_shared<std::mt19937_64>();
if (seed == 0) {
seed = GetRandomSeed();
VLOG(4) << "Use default random engine with random seed = " << seed;
} else {
VLOG(4) << "Use default random engine with fixed random seed = " << seed;
}
static std::mutex mu_;
{
std::lock_guard<std::mutex> lock(mu_);
......@@ -204,11 +190,5 @@ std::pair<uint64_t, uint64_t> Generator::IncrementOffset(
#endif
}
void Generator::SetIsInitPy(bool is_init_py) {
this->is_init_py_ = is_init_py;
VLOG(4) << "SetIsInitPy:" << this->is_init_py_;
}
bool Generator::GetIsInitPy() const { return this->is_init_py_; }
} // namespace framework
} // namespace paddle
......@@ -59,7 +59,6 @@ struct Generator : public phi::Generator {
this->engine_ = engine;
VLOG(4) << "initial seed: " << this->state_.current_seed
<< ", cpu engine: " << &this->state_.cpu_engine;
this->is_init_py_ = true; // TODO(zhiqiu): remove it in future
}
Generator(uint64_t seed, uint64_t device_id) {
std::seed_seq seq({seed});
......@@ -71,7 +70,6 @@ struct Generator : public phi::Generator {
this->engine_ = engine;
VLOG(4) << "initial seed: " << this->state_.current_seed
<< ", cpu engine: " << &this->state_.cpu_engine;
this->is_init_py_ = false; // TODO(zhiqiu): remove it in future
}
Generator(const Generator& other) = delete;
......@@ -95,32 +93,21 @@ struct Generator : public phi::Generator {
std::pair<uint64_t, uint64_t> IncrementOffset(uint64_t increament_offset);
void SetIsInitPy(bool);
bool GetIsInitPy() const;
uint64_t get_device_id() { return this->state_.device; }
private:
phi::Generator::GeneratorState state_;
std::shared_ptr<std::mt19937_64> engine_;
mutable std::mutex mu_;
// NOTE(zhiqiu): is_init_py_ is used to make generator be compatible with
// old seed, and it should be removed after all random-related operators
// and unittests upgrades to use generator.
bool is_init_py_ = false;
};
// The DefaultCPUGenerator is used in manual_seed()
const std::shared_ptr<Generator>& DefaultCPUGenerator();
// If op seed is set or global is not set, the OpDefaultCPUEngine is used.
std::shared_ptr<std::mt19937_64> OpDefaultCPUEngine();
const std::shared_ptr<Generator>& DefaultCUDAGenerator(int64_t device_id = -1);
std::shared_ptr<std::mt19937_64> GetCPURandomEngine(uint64_t);
const std::shared_ptr<Generator>& GetDefaultCUDAGenerator(
int64_t device_id = -1);
const std::shared_ptr<Generator>& SetRandomSeedGenerator(
const std::string& name, uint64_t seed);
......
......@@ -416,14 +416,13 @@ class ClassCenterSampleCUDAKernel : public framework::OpKernel<T> {
1) *
vec_size;
int device_id = ctx.GetPlace().GetDeviceId();
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
if (gen_cuda->GetIsInitPy() && (!fix_seed)) {
auto gen_cuda = framework::DefaultCUDAGenerator(device_id);
if (!fix_seed) {
auto seed_offset = gen_cuda->IncrementOffset(offset);
seed_data = seed_offset.first;
increment = seed_offset.second;
} else {
std::random_device rnd;
seed_data = fix_seed ? seed + rank : rnd();
seed_data = seed + rank;
increment = offset;
}
RandomSampleClassCenter<T><<<NumBlocks(num_classes), kNumCUDAThreads, 0,
......
......@@ -172,17 +172,13 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
int seed = ctx.Attr<int>("seed");
if (!is_test) {
if (seed == 0) {
// If not specify seed, use global Generator to generate seed.
int device_id = ctx.GetPlace().GetDeviceId();
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
if (gen_cuda->GetIsInitPy() && seed == 0) {
// If perform `manual_seed` in python and inner seed is not specified
// (equals 0), use global generator generated seed.
auto gen_cuda = paddle::framework::DefaultCUDAGenerator(device_id);
seed = static_cast<int>(gen_cuda->Random64());
} else if (seed == 0) {
// use random generated seed
std::random_device rd;
seed = rd();
} // else use `ctx.Attr<int>("seed")` specified seed
}
// else use `ctx.Attr<int>("seed")` specified seed
}
bool has_seq_length = ctx.HasInput("SequenceLength");
......
......@@ -77,7 +77,7 @@ struct DirichletSampler<platform::CUDADeviceContext, T> {
// init state, seed & offset for all threads
int device_id = ctx.GetPlace().GetDeviceId();
auto p_gen = framework::GetDefaultCUDAGenerator(device_id);
auto p_gen = framework::DefaultCUDAGenerator(device_id);
auto seed_and_offset = p_gen->IncrementOffset(10); // hard-coded offset
auto seed = seed_and_offset.first;
auto offset = seed_and_offset.second;
......
......@@ -26,7 +26,7 @@ inline void GetSeedDataAndIncrement(const phi::GPUContext& dev_ctx,
const int offset, uint64_t* seed_data,
uint64_t* increment) {
int device_id = dev_ctx.GetPlace().GetDeviceId();
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
auto gen_cuda = framework::DefaultCUDAGenerator(device_id);
if (seed) {
framework::Tensor seed_cpu_tensor;
......@@ -34,13 +34,12 @@ inline void GetSeedDataAndIncrement(const phi::GPUContext& dev_ctx,
&seed_cpu_tensor);
*seed_data = static_cast<uint64_t>(seed_cpu_tensor.data<int>()[0]);
*increment = offset;
} else if (gen_cuda->GetIsInitPy() && (!is_fix_seed)) {
} else if (!is_fix_seed) {
auto seed_offset = gen_cuda->IncrementOffset(offset);
*seed_data = seed_offset.first;
*increment = seed_offset.second;
} else {
std::random_device rnd;
*seed_data = is_fix_seed ? seed_val : rnd();
*seed_data = seed_val;
*increment = offset;
}
}
......
......@@ -54,26 +54,21 @@ class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
auto* tensor = context.Output<framework::Tensor>("Out");
T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
bool seed_flag = false;
if (seed == 0) {
std::random_device rd;
seed = rd();
seed_flag = true;
}
T mean = static_cast<T>(context.Attr<float>("mean"));
T std = static_cast<T>(context.Attr<float>("std"));
int64_t size = tensor->numel();
int device_id = context.GetPlace().GetDeviceId();
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
auto gen_cuda = framework::DefaultCUDAGenerator(device_id);
auto& dev_cxt =
context.template device_context<platform::CUDADeviceContext>();
if (gen_cuda->GetIsInitPy() && seed_flag) {
if (seed == 0) {
// use global Generator seed
auto seed_offset = gen_cuda->IncrementOffset(1);
int64_t gen_offset = size * seed_offset.second;
auto func = GaussianGenerator<T>(mean, std, seed_offset.first,
seed_offset.second);
uint64_t seed = seed_offset.first;
uint64_t offset = seed_offset.second;
auto func = GaussianGenerator<T>(mean, std, seed, size * offset);
phi::IndexKernel<T, GaussianGenerator<T>>(dev_cxt, tensor, func);
} else {
auto func = GaussianGenerator<T>(mean, std, seed);
......
......@@ -151,12 +151,6 @@ void UniformRandom(const framework::ExecutionContext& context,
T* data = tensor->mutable_data<T>(dev_cxt.GetPlace());
if (size <= 0) return;
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
bool seed_flag = false;
if (seed == 0) {
std::random_device rd;
seed = rd();
seed_flag = true;
}
T min = static_cast<T>(context.Attr<float>("min"));
T max = static_cast<T>(context.Attr<float>("max"));
......@@ -165,14 +159,15 @@ void UniformRandom(const framework::ExecutionContext& context,
unsigned int diag_step =
static_cast<unsigned int>(context.Attr<int>("diag_step"));
T diag_val = static_cast<T>(context.Attr<float>("diag_val"));
int device_id = context.GetPlace().GetDeviceId();
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
if (gen_cuda->GetIsInitPy() && seed_flag) {
if (seed == 0) {
// Use global Generator seed
using MT = typename details::MPTypeTrait<T>::Type;
phi::funcs::uniform_distribution<MT> dist;
phi::funcs::uniform_real_transform<MT> trans(min, max);
phi::funcs::distribution_and_transform<T>(dev_cxt, tensor, dist, trans);
} else {
// Use OP seed
auto func =
UniformGenerator<T>(min, max, seed, diag_num, diag_step, diag_val);
phi::IndexKernel<T, UniformGenerator<T>>(dev_cxt, tensor, func);
......
......@@ -169,7 +169,7 @@ inline void EmplaceDeviceContext(
cuda_ctx->PartialInitWithAllocator();
dev_ctx->SetGenerator(
framework::GetDefaultCUDAGenerator(p.GetDeviceId()).get());
framework::DefaultCUDAGenerator(p.GetDeviceId()).get());
#endif
} else {
dev_ctx->SetAllocator(memory::allocation::AllocatorFacade::Instance()
......
......@@ -55,13 +55,9 @@ void BindGenerator(py::module* m_ptr) {
})
.def("seed", &framework::Generator::Seed)
.def("initial_seed", &framework::Generator::GetCurrentSeed)
.def("random", &framework::Generator::Random64)
// .def("get_cpu_engine", &framework::Generator::GetCPUEngine)
// .def("set_cpu_engine", &framework::Generator::SetCPUEngine)
.def_property("_is_init_py", &framework::Generator::GetIsInitPy,
&framework::Generator::SetIsInitPy);
.def("random", &framework::Generator::Random64);
m.def("default_cpu_generator", &framework::DefaultCPUGenerator);
m.def("default_cuda_generator", &framework::GetDefaultCUDAGenerator);
m.def("default_cuda_generator", &framework::DefaultCUDAGenerator);
m.def("set_random_seed_generator", &framework::SetRandomSeedGenerator);
m.def("get_random_seed_generator", &framework::GetRandomSeedGenerator);
}
......
......@@ -49,12 +49,6 @@ class Generator {
virtual std::pair<uint64_t, uint64_t> IncrementOffset(
uint64_t increament_offset) = 0;
// NOTE(zhiqiu): is_init_py_ is used to make generator be compatible with
// old seed, and it should be removed after all random-related operators
// and unittests upgrades to use generator.
virtual void SetIsInitPy(bool) = 0;
virtual bool GetIsInitPy() const = 0;
virtual uint64_t get_device_id() = 0;
};
......
......@@ -59,34 +59,20 @@ void GaussianRandomKernel(const Context& dev_ctx,
int seed,
DataType dtype,
DenseTensor* out) {
auto tensor = out;
bool seed_flag = false;
out->Resize(phi::make_ddim(shape.GetData()));
dev_ctx.template Alloc<T>(out);
if (seed == 0) {
std::random_device rd;
seed = rd();
seed_flag = true;
}
tensor->Resize(phi::make_ddim(shape.GetData()));
T* data = dev_ctx.template Alloc<T>(tensor);
int64_t size = tensor->numel();
int device_id = dev_ctx.GetPlace().GetDeviceId();
auto gen_cuda = paddle::framework::GetDefaultCUDAGenerator(device_id);
if (gen_cuda->GetIsInitPy() && seed_flag) {
// use global Generator seed
using MT = typename phi::dtype::MPTypeTrait<T>::Type;
funcs::normal_distribution<MT> dist;
funcs::normal_transform<MT> trans(static_cast<MT>(mean),
static_cast<MT>(std));
funcs::distribution_and_transform<T>(dev_ctx, tensor, dist, trans);
funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans);
} else {
// use OP seed
auto func =
GaussianGenerator<T>(static_cast<T>(mean), static_cast<T>(std), seed);
IndexKernel<T, GaussianGenerator<T>>(dev_ctx, tensor, func);
IndexKernel<T, GaussianGenerator<T>>(dev_ctx, out, func);
}
}
......
......@@ -27,12 +27,9 @@
namespace cub = hipcub;
#endif
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/random.h>
#include <thrust/transform.h>
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/kernels/funcs/distribution_helper.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace phi {
......@@ -144,27 +141,21 @@ struct GumbleNoiseGenerator<GPUContext, T> {
DenseTensor random_tensor;
int64_t size = size_to_axis * size_from_axis;
random_tensor.Resize(make_ddim({size}));
auto* random_data = ctx.template Alloc<T>(&random_tensor);
thrust::counting_iterator<int64_t> index_sequence_begin(0);
T* random_data = ctx.template Alloc<T>(&random_tensor);
// generate gumbel noise
int device_id = ctx.GetPlace().GetDeviceId();
auto gen_cuda = paddle::framework::GetDefaultCUDAGenerator(device_id);
if (gen_cuda->GetIsInitPy()) {
auto gen_cuda = ctx.GetGenerator();
auto seed_offset = gen_cuda->IncrementOffset(1);
int64_t gen_offset = size * seed_offset.second;
thrust::transform(
index_sequence_begin,
index_sequence_begin + size,
thrust::device_ptr<T>(random_data),
UniformCUDAGenerator<T>(0.00001, 1, seed_offset.first, gen_offset));
} else {
const unsigned int seed = std::random_device()();
uint64_t seed = seed_offset.first;
uint64_t offset = seed_offset.second;
thrust::counting_iterator<int64_t> index_sequence_begin(0);
thrust::transform(index_sequence_begin,
index_sequence_begin + size,
thrust::device_ptr<T>(random_data),
UniformCUDAGenerator<T>(0.00001, 1, seed));
}
UniformCUDAGenerator<T>(0.00001, 1, seed, size * offset));
// add gumbel noise to X
const int thread_size = 512;
......
......@@ -175,17 +175,13 @@ void RnnKernel(const Context &dev_ctx,
mode));
if (!is_test) {
if (seed == 0) {
// If not specify seed, use global Generator to generate seed.
int device_id = dev_ctx.GetPlace().GetDeviceId();
auto gen_cuda = paddle::framework::GetDefaultCUDAGenerator(device_id);
if (gen_cuda->GetIsInitPy() && seed == 0) {
// If perform `manual_seed` in python and inner seed is not specified
// (equals 0), use global generator generated seed.
auto gen_cuda = paddle::framework::DefaultCUDAGenerator(device_id);
seed = static_cast<int>(gen_cuda->Random64());
} else if (seed == 0) {
// use random generated seed
std::random_device rd;
seed = rd();
} // else use `ctx.Attr<int>("seed")` specified seed
}
// else use `ctx.Attr<int>("seed")` specified seed
}
const T *x_data = x.data<T>();
......
......@@ -90,34 +90,25 @@ void TruncatedGaussianRandomKernel(const Context& dev_ctx,
int seed,
DataType dtype,
DenseTensor* out) {
auto tensor = out;
T* data = dev_ctx.template Alloc<T>(tensor);
bool seed_flag = false;
if (seed == 0) {
std::random_device rd;
seed = rd();
seed_flag = true;
}
T* data = dev_ctx.template Alloc<T>(out);
thrust::counting_iterator<int64_t> index_sequence_begin(0);
int64_t size = tensor->numel();
int64_t size = out->numel();
auto gen_cuda = dev_ctx.GetGenerator();
if (gen_cuda->GetIsInitPy() && seed_flag) {
if (seed == 0) {
// use global Generator seed
auto seed_offset = gen_cuda->IncrementOffset(1);
int64_t gen_offset = size * seed_offset.second;
thrust::transform(index_sequence_begin,
uint64_t seed = seed_offset.first;
uint64_t offset = seed_offset.second;
thrust::transform(
index_sequence_begin,
index_sequence_begin + size,
thrust::device_ptr<T>(data),
TruncatedNormalOffset<T>(mean,
std,
std::numeric_limits<T>::min(),
seed_offset.first,
gen_offset));
TruncatedNormalOffset<T>(
mean, std, std::numeric_limits<T>::min(), seed, size * offset));
} else {
// use OP seed
thrust::transform(
index_sequence_begin,
index_sequence_begin + size,
......
......@@ -65,22 +65,15 @@ void UniformRandomRawKernel(const Context& dev_ctx,
float diag_val,
DenseTensor* out) {
out->Resize(phi::make_ddim(shape.GetData()));
T* data = dev_ctx.template Alloc<T>(out);
auto size = out->numel();
bool seed_flag = false;
dev_ctx.template Alloc<T>(out);
if (seed == 0) {
std::random_device rd;
seed = rd();
seed_flag = true;
}
auto generator = dev_ctx.GetGenerator();
if (generator->GetIsInitPy() && seed_flag) {
// Use global Generator seed
using MT = typename kps::details::MPTypeTrait<T>::Type;
funcs::uniform_distribution<MT> dist;
funcs::uniform_real_transform<MT> trans(min, max);
funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans);
} else {
// Use OP seed
auto func =
UniformGenerator<T>(min, max, seed, diag_num, diag_step, diag_val);
IndexKernel<T, UniformGenerator<T>>(dev_ctx, out, func);
......
......@@ -25,6 +25,8 @@ import paddle
import paddle.fluid.core as core
@unittest.skipIf(not core.is_compiled_with_cuda(),
"Only test cuda Random Generator")
class TestGeneratorSeed(unittest.TestCase):
"""
Test cases for cpu generator seed.
......@@ -70,15 +72,13 @@ class TestGeneratorSeed(unittest.TestCase):
"""Test Generator seed."""
fluid.enable_dygraph()
paddle.seed(12312321111)
x = fluid.layers.gaussian_random([120], dtype="float32")
st1 = paddle.get_cuda_rng_state()
x1 = fluid.layers.gaussian_random([120], dtype="float32")
paddle.set_cuda_rng_state(st1)
x2 = fluid.layers.gaussian_random([120], dtype="float32")
paddle.seed(12312321111)
x3 = fluid.layers.gaussian_random([120], dtype="float32")
x_np = x.numpy()
st = paddle.get_cuda_rng_state()
x1 = paddle.randn([120], dtype="float32")
paddle.set_cuda_rng_state(st)
x2 = paddle.randn([120], dtype="float32")
paddle.set_cuda_rng_state(st)
x3 = paddle.randn([120], dtype="float32")
x1_np = x1.numpy()
x2_np = x2.numpy()
x3_np = x3.numpy()
......@@ -86,7 +86,7 @@ class TestGeneratorSeed(unittest.TestCase):
if core.is_compiled_with_cuda():
print(">>>>>>> gaussian random dygraph >>>>>>>")
self.assertTrue(np.allclose(x1_np, x2_np))
self.assertTrue(np.allclose(x_np, x3_np))
self.assertTrue(np.allclose(x2_np, x3_np))
def test_generator_randint_dygraph(self):
"""Test Generator seed."""
......
......@@ -629,7 +629,6 @@ class ModuleApiTest(unittest.TestCase):
else:
fluid.disable_dygraph()
gen = paddle.seed(self._random_seed)
gen._is_init_py = False
paddle.framework.random._manual_program_seed(self._random_seed)
scope = fluid.core.Scope()
with fluid.scope_guard(scope):
......
......@@ -178,7 +178,6 @@ class TestUniformRandomOpAPISeed(unittest.TestCase):
def test_attr_tensor_API(self):
_seed = 10
gen = paddle.seed(_seed)
gen._is_init_py = False
startup_program = fluid.Program()
train_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
......
......@@ -370,7 +370,6 @@ class TestUniformRandomOp_API_seed(unittest.TestCase):
def test_attr_tensor_API(self):
_seed = 10
gen = paddle.seed(_seed)
gen._is_init_py = False
startup_program = fluid.Program()
train_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
......
......@@ -44,10 +44,8 @@ def seed(seed):
if core.is_compiled_with_cuda():
for i in range(core.get_cuda_device_count()):
core.default_cuda_generator(i)._is_init_py = True
core.default_cuda_generator(i).manual_seed(seed)
core.default_cpu_generator()._is_init_py = True
return core.default_cpu_generator().manual_seed(seed)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册