未验证 提交 fb228c4a 编写于 作者: R RedContritio 提交者: GitHub

Fix UFA非法地址访问(UFA illegal address access) of case2: paddle.scatter (#50025)

* add dim check in scatter

* add check in scatter.cu

* add unittest

* remove unnecessary log and comment

---------

Co-authored-by: RedContritio <>
上级 e1a792fe
...@@ -28,6 +28,7 @@ namespace funcs { ...@@ -28,6 +28,7 @@ namespace funcs {
template <typename T, typename IndexT = int> template <typename T, typename IndexT = int>
__global__ void ScatterInitCUDAKernel(const IndexT* indices, __global__ void ScatterInitCUDAKernel(const IndexT* indices,
T* output, T* output,
size_t output_count,
size_t index_size, size_t index_size,
size_t slice_size) { size_t slice_size) {
CUDA_KERNEL_LOOP_TYPE(i, index_size * slice_size, int64_t) { CUDA_KERNEL_LOOP_TYPE(i, index_size * slice_size, int64_t) {
...@@ -35,11 +36,13 @@ __global__ void ScatterInitCUDAKernel(const IndexT* indices, ...@@ -35,11 +36,13 @@ __global__ void ScatterInitCUDAKernel(const IndexT* indices,
int64_t slice_i = i - indices_i * slice_size; // offset inside the slice int64_t slice_i = i - indices_i * slice_size; // offset inside the slice
IndexT scatter_i = indices[indices_i]; IndexT scatter_i = indices[indices_i];
PADDLE_ENFORCE(scatter_i >= 0, PADDLE_ENFORCE(
scatter_i >= 0 && scatter_i < output_count,
"The index is out of bounds, " "The index is out of bounds, "
"please check whether the dimensions of index and " "please check whether the dimensions of index and "
"input meet the requirements. It should " "input meet the requirements. It should "
"be greater than or equal to 0, but received [%d]", "be less than [%d] and greater or equal to 0, but received [%d]",
output_count,
scatter_i); scatter_i);
int64_t out_i = scatter_i * slice_size + slice_i; int64_t out_i = scatter_i * slice_size + slice_i;
...@@ -51,6 +54,7 @@ template <typename T, typename IndexT = int> ...@@ -51,6 +54,7 @@ template <typename T, typename IndexT = int>
__global__ void ScatterCUDAKernel(const T* params, __global__ void ScatterCUDAKernel(const T* params,
const IndexT* indices, const IndexT* indices,
T* output, T* output,
size_t output_count,
size_t index_size, size_t index_size,
size_t slice_size, size_t slice_size,
bool overwrite) { bool overwrite) {
...@@ -59,11 +63,13 @@ __global__ void ScatterCUDAKernel(const T* params, ...@@ -59,11 +63,13 @@ __global__ void ScatterCUDAKernel(const T* params,
int64_t slice_i = i - indices_i * slice_size; // offset inside the slice int64_t slice_i = i - indices_i * slice_size; // offset inside the slice
IndexT scatter_i = indices[indices_i]; IndexT scatter_i = indices[indices_i];
PADDLE_ENFORCE(scatter_i >= 0, PADDLE_ENFORCE(
scatter_i >= 0 && scatter_i < output_count,
"The index is out of bounds, " "The index is out of bounds, "
"please check whether the dimensions of index and " "please check whether the dimensions of index and "
"input meet the requirements. It should " "input meet the requirements. It should "
"be greater than or equal to 0, but received [%d]", "be less than [%d] and greater or equal to 0, but received [%d]",
output_count,
scatter_i); scatter_i);
int64_t out_i = scatter_i * slice_size + slice_i; int64_t out_i = scatter_i * slice_size + slice_i;
...@@ -143,8 +149,7 @@ void GPUScatterAssign(const phi::GPUContext& ctx, ...@@ -143,8 +149,7 @@ void GPUScatterAssign(const phi::GPUContext& ctx,
int64_t index_size = index.dims().size() == 0 ? 1 : index.dims()[0]; int64_t index_size = index.dims().size() == 0 ? 1 : index.dims()[0];
auto src_dims = src.dims(); auto src_dims = src.dims();
phi::DDim output_dims(src_dims); phi::DDim output_dims = output->dims();
output_dims[0] = index_size;
// slice size // slice size
size_t slice_size = 1; size_t slice_size = 1;
...@@ -169,11 +174,16 @@ void GPUScatterAssign(const phi::GPUContext& ctx, ...@@ -169,11 +174,16 @@ void GPUScatterAssign(const phi::GPUContext& ctx,
// if not overwrite mode, init data // if not overwrite mode, init data
if (!overwrite) { if (!overwrite) {
ScatterInitCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>( ScatterInitCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(
p_index, p_output, index_size, slice_size); p_index, p_output, output_dims[0], index_size, slice_size);
} }
ScatterCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>( ScatterCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(p_src,
p_src, p_index, p_output, index_size, slice_size, overwrite); p_index,
p_output,
output_dims[0],
index_size,
slice_size,
overwrite);
} }
// The function is only for scatter grad x, // The function is only for scatter grad x,
...@@ -203,7 +213,7 @@ void GPUScatterGradForX(const phi::GPUContext& ctx, ...@@ -203,7 +213,7 @@ void GPUScatterGradForX(const phi::GPUContext& ctx,
phi::backends::gpu::LimitGridDim(ctx, &grid); phi::backends::gpu::LimitGridDim(ctx, &grid);
ScatterInitCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>( ScatterInitCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(
p_index, p_output, index_size, slice_size); p_index, p_output, dst_dims[0], index_size, slice_size);
} }
template <typename T, typename IndexT = int> template <typename T, typename IndexT = int>
......
...@@ -99,6 +99,7 @@ void ScatterAssign(const phi::CPUContext& ctx, ...@@ -99,6 +99,7 @@ void ScatterAssign(const phi::CPUContext& ctx,
auto dst_dims = output->dims(); auto dst_dims = output->dims();
const T* p_src = src.data<T>(); const T* p_src = src.data<T>();
// IndexT is int32 or int64, so direct compare is allowed.
const IndexT* p_index = index.data<IndexT>(); const IndexT* p_index = index.data<IndexT>();
T* p_output = output->data<T>(); T* p_output = output->data<T>();
...@@ -140,6 +141,17 @@ void ScatterAssign(const phi::CPUContext& ctx, ...@@ -140,6 +141,17 @@ void ScatterAssign(const phi::CPUContext& ctx,
"be greater than or equal to 0, but received [%d]", "be greater than or equal to 0, but received [%d]",
index_)); index_));
PADDLE_ENFORCE_LT(
index_,
dst_dims[0],
phi::errors::OutOfRange(
"The index is out of bounds, "
"please check whether the values of index and "
"dimensions of input meet the requirements. each index should "
"be less than 1st-dim size (%d) of input, but received [%d]",
dst_dims[0],
index_));
memcpy(p_output + index_ * slice_size, p_src + i * slice_size, slice_bytes); memcpy(p_output + index_ * slice_size, p_src + i * slice_size, slice_bytes);
} }
} }
......
...@@ -376,6 +376,32 @@ class TestScatterInplaceAPI(TestScatterAPI): ...@@ -376,6 +376,32 @@ class TestScatterInplaceAPI(TestScatterAPI):
self.scatter = paddle.scatter_ self.scatter = paddle.scatter_
@unittest.skipIf(core.is_compiled_with_cuda(), "CUDA will not throw exception")
class TestScatterError(unittest.TestCase):
def test_scatter_index(self):
paddle.disable_static()
x = paddle.to_tensor([[1, 1], [2, 2], [3, 3]], dtype='float32')
def test_neg_index():
index = paddle.to_tensor([2, 1, -1, 1], dtype='int64')
updates = paddle.to_tensor(
[[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32'
)
out = paddle.scatter(x, index, updates)
self.assertRaises(IndexError, test_neg_index)
def test_too_big_index():
index = paddle.to_tensor([2, 1, 5, 1], dtype='int64')
updates = paddle.to_tensor(
[[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32'
)
out = paddle.scatter(x, index, updates)
self.assertRaises(IndexError, test_too_big_index)
paddle.enable_static()
if __name__ == "__main__": if __name__ == "__main__":
paddle.enable_static() paddle.enable_static()
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册