未验证 提交 9aa39584 编写于 作者: Y yaoxuefeng 提交者: GitHub

fix cuda generator hard-coded offset step (#27027)

上级 c3b314c3
...@@ -71,11 +71,7 @@ class GPUGaussianRandomKernel : public framework::OpKernel<T> { ...@@ -71,11 +71,7 @@ class GPUGaussianRandomKernel : public framework::OpKernel<T> {
if (gen_cuda->GetIsInitPy() && seed_flag) { if (gen_cuda->GetIsInitPy() && seed_flag) {
auto seed_offset = gen_cuda->IncrementOffset(1); auto seed_offset = gen_cuda->IncrementOffset(1);
int offset_step = 100; int gen_offset = size * seed_offset.second;
// NOTE(xuefeng): Currently, we let offset step fixed to avoid
// unexpected results which may cause ut fail.
// we will fix this in future.
int gen_offset = offset_step * seed_offset.second;
thrust::transform( thrust::transform(
index_sequence_begin, index_sequence_begin + size, index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data), thrust::device_ptr<T>(data),
...@@ -112,11 +108,7 @@ class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> { ...@@ -112,11 +108,7 @@ class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
if (gen_cuda->GetIsInitPy() && seed_flag) { if (gen_cuda->GetIsInitPy() && seed_flag) {
auto seed_offset = gen_cuda->IncrementOffset(1); auto seed_offset = gen_cuda->IncrementOffset(1);
int offset_step = 100; int gen_offset = size * seed_offset.second;
// NOTE(xuefeng): Currently, we let offset step fixed to avoid
// unexpected results which may cause ut fail.
// we will fix this in future.
int gen_offset = offset_step * seed_offset.second;
thrust::transform(index_sequence_begin, index_sequence_begin + size, thrust::transform(index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data), thrust::device_ptr<T>(data),
GaussianGenerator<T>(mean, std, seed_offset.first, GaussianGenerator<T>(mean, std, seed_offset.first,
......
...@@ -103,11 +103,7 @@ class GPUTruncatedGaussianRandomKernel : public framework::OpKernel<T> { ...@@ -103,11 +103,7 @@ class GPUTruncatedGaussianRandomKernel : public framework::OpKernel<T> {
if (gen_cuda->GetIsInitPy() && seed_flag) { if (gen_cuda->GetIsInitPy() && seed_flag) {
auto seed_offset = gen_cuda->IncrementOffset(1); auto seed_offset = gen_cuda->IncrementOffset(1);
int offset_step = 100; int gen_offset = size * seed_offset.second;
// NOTE(xuefeng): Currently, we let offset step fixed to avoid
// unexpected results which may cause ut fail.
// we will fix this in future.
int gen_offset = offset_step * seed_offset.second;
thrust::transform( thrust::transform(
index_sequence_begin, index_sequence_begin + size, index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data), thrust::device_ptr<T>(data),
......
...@@ -143,11 +143,7 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> { ...@@ -143,11 +143,7 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> {
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id); auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
if (gen_cuda->GetIsInitPy() && seed_flag) { if (gen_cuda->GetIsInitPy() && seed_flag) {
auto seed_offset = gen_cuda->IncrementOffset(1); auto seed_offset = gen_cuda->IncrementOffset(1);
int offset_step = 100; int gen_offset = size * seed_offset.second;
// NOTE(xuefeng): Currently, we let offset step fixed to avoid
// unexpected results which may cause ut fail.
// we will fix this in future.
int gen_offset = offset_step * seed_offset.second;
thrust::transform( thrust::transform(
index_sequence_begin, index_sequence_begin + size, index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data), thrust::device_ptr<T>(data),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册