From 380bc4e6947f04b25ea071a26815b674b5cc694c Mon Sep 17 00:00:00 2001 From: Haohongxiang <86215757+haohongxiang@users.noreply.github.com> Date: Thu, 15 Jul 2021 21:43:02 -0500 Subject: [PATCH] Fix gather_op to avoid cudaErrorLaunchFailure for solov2, test=develop (#34200) --- paddle/fluid/operators/gather.cu.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/operators/gather.cu.h b/paddle/fluid/operators/gather.cu.h index 6469307bc56..3b5ba1cf566 100644 --- a/paddle/fluid/operators/gather.cu.h +++ b/paddle/fluid/operators/gather.cu.h @@ -30,20 +30,13 @@ using platform::DeviceContext; template __global__ void GatherCUDAKernel(const T* params, const IndexT* indices, - T* output, size_t input_size, - size_t index_size, size_t slice_size) { + T* output, size_t index_size, + size_t slice_size) { CUDA_KERNEL_LOOP(i, index_size * slice_size) { int indices_i = i / slice_size; int slice_i = i - indices_i * slice_size; // offset inside the slice IndexT gather_i = indices[indices_i]; IndexT params_i = gather_i * slice_size + slice_i; - PADDLE_ENFORCE( - gather_i >= 0 && gather_i < input_size, - "The index is out of bounds, " - "please check whether the dimensions of index and " - "input meet the requirements. It should " - "be less than [%d] and greater than or equal to 0, but received [%d]", - input_size, gather_i); *(output + i) = *(params + params_i); } } @@ -108,8 +101,6 @@ void GPUGather(const platform::DeviceContext& ctx, const Tensor& src, // slice size int slice_size = 1; for (int i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i]; - // input size - int input_size = src_dims[0] * slice_size; const T* p_src = src.data(); const IndexT* p_index = index.data(); @@ -122,7 +113,7 @@ void GPUGather(const platform::DeviceContext& ctx, const Tensor& src, GatherCUDAKernel<<< grid, block, 0, reinterpret_cast(ctx).stream()>>>( - p_src, p_index, p_output, input_size, index_size, slice_size); + p_src, p_index, p_output, index_size, slice_size); } template -- GitLab