未验证 提交 0c82e3a0 编写于 作者: Y Yang 提交者: GitHub

fix overflow in some cuda ops (#37670)

上级 b0dff05d
......@@ -61,16 +61,16 @@ class BernoulliOpKernel<platform::CUDADeviceContext, T>
BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()).GetDeviceId();
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
auto seed_offset = gen_cuda->IncrementOffset(1);
int gen_offset = size * seed_offset.second;
int64_t gen_offset = size * seed_offset.second;
platform::Transform<platform::CUDADeviceContext> trans;
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
thrust::counting_iterator<int64_t> index_sequence_begin(0);
auto* context =
static_cast<const platform::CUDADeviceContext*>(&ctx.device_context());
trans(*context, index_sequence_begin, index_sequence_begin + size, in_data,
out_data,
BernoulliCudaFunctor<T>(static_cast<unsigned int>(seed_offset.first),
static_cast<unsigned int>(gen_offset)));
BernoulliCudaFunctor<T>(static_cast<int64_t>(seed_offset.first),
static_cast<int64_t>(gen_offset)));
}
};
......
......@@ -59,7 +59,7 @@ class GPUGaussianRandomKernel : public framework::OpKernel<T> {
}
T mean = static_cast<T>(context.Attr<float>("mean"));
T std = static_cast<T>(context.Attr<float>("std"));
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
thrust::counting_iterator<int64_t> index_sequence_begin(0);
auto shape = GetShape(context);
tensor->Resize(shape);
T* data = tensor->mutable_data<T>(context.GetPlace());
......@@ -72,7 +72,7 @@ class GPUGaussianRandomKernel : public framework::OpKernel<T> {
if (gen_cuda->GetIsInitPy() && seed_flag) {
auto seed_offset = gen_cuda->IncrementOffset(1);
int gen_offset = size * seed_offset.second;
int64_t gen_offset = size * seed_offset.second;
thrust::transform(
index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data),
......@@ -100,7 +100,7 @@ class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
}
T mean = static_cast<T>(context.Attr<float>("mean"));
T std = static_cast<T>(context.Attr<float>("std"));
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
thrust::counting_iterator<int64_t> index_sequence_begin(0);
int64_t size = tensor->numel();
int device_id =
......@@ -109,7 +109,7 @@ class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
if (gen_cuda->GetIsInitPy() && seed_flag) {
auto seed_offset = gen_cuda->IncrementOffset(1);
int gen_offset = size * seed_offset.second;
int64_t gen_offset = size * seed_offset.second;
thrust::transform(index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data),
GaussianGenerator<T>(mean, std, seed_offset.first,
......
......@@ -129,7 +129,7 @@ struct GumbleNoiseGenerator<platform::CUDADeviceContext, T> {
int64_t size = size_to_axis * size_from_axis;
T* random_data =
random_tensor.mutable_data<T>({size}, platform::CUDAPlace());
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
thrust::counting_iterator<int64_t> index_sequence_begin(0);
// generate gumbel noise
int device_id =
......@@ -137,7 +137,7 @@ struct GumbleNoiseGenerator<platform::CUDADeviceContext, T> {
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
if (gen_cuda->GetIsInitPy()) {
auto seed_offset = gen_cuda->IncrementOffset(1);
int gen_offset = size * seed_offset.second;
int64_t gen_offset = size * seed_offset.second;
thrust::transform(
index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(random_data),
......
......@@ -239,7 +239,7 @@ class MultinomialOpKernel<platform::CUDADeviceContext, T>
auto* rng_data = rng_data_tensor.mutable_data<T>(
{num_distributions, num_samples}, ctx.GetPlace());
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
thrust::counting_iterator<int64_t> index_sequence_begin(0);
platform::Transform<platform::CUDADeviceContext> trans;
auto* context =
static_cast<const platform::CUDADeviceContext*>(&ctx.device_context());
......
......@@ -97,7 +97,7 @@ class GPUTruncatedGaussianRandomKernel : public framework::OpKernel<T> {
}
T mean = static_cast<T>(context.Attr<float>("mean"));
T std = static_cast<T>(context.Attr<float>("std"));
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
thrust::counting_iterator<int64_t> index_sequence_begin(0);
int64_t size = tensor->numel();
int device_id =
......@@ -106,7 +106,7 @@ class GPUTruncatedGaussianRandomKernel : public framework::OpKernel<T> {
if (gen_cuda->GetIsInitPy() && seed_flag) {
auto seed_offset = gen_cuda->IncrementOffset(1);
int gen_offset = size * seed_offset.second;
int64_t gen_offset = size * seed_offset.second;
thrust::transform(
index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data),
......
......@@ -118,14 +118,14 @@ class GPUUniformRandomInplaceKernel : public framework::OpKernel<T> {
unsigned int diag_step =
static_cast<unsigned int>(ctx.Attr<int>("diag_step"));
T diag_val = static_cast<T>(ctx.Attr<float>("diag_val"));
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
thrust::counting_iterator<int64_t> index_sequence_begin(0);
int64_t size = tensor->numel();
int device_id =
BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()).GetDeviceId();
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
if (gen_cuda->GetIsInitPy() && seed_flag) {
auto seed_offset = gen_cuda->IncrementOffset(1);
int gen_offset = size * seed_offset.second;
int64_t gen_offset = size * seed_offset.second;
thrust::transform(
index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data),
......
......@@ -139,14 +139,14 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> {
unsigned int diag_step =
static_cast<unsigned int>(context.Attr<int>("diag_step"));
T diag_val = static_cast<T>(context.Attr<float>("diag_val"));
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
thrust::counting_iterator<int64_t> index_sequence_begin(0);
int64_t size = tensor->numel();
int device_id =
BOOST_GET_CONST(platform::CUDAPlace, context.GetPlace()).GetDeviceId();
auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
if (gen_cuda->GetIsInitPy() && seed_flag) {
auto seed_offset = gen_cuda->IncrementOffset(1);
int gen_offset = size * seed_offset.second;
int64_t gen_offset = size * seed_offset.second;
thrust::transform(
index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册