Fix UFA非法地址访问(UFA illegal address access) of case2: paddle.scatter (#50025)

* add dim check in scatter * add check in scatter.cu * add unittest * remove unnecessary log and comment --------- Co-authored-by: RedContritio <>

Fix UFA非法地址访问(UFA illegal address access) of case2: paddle.scatter (#50025)
* add dim check in scatter * add check in scatter.cu * add unittest * remove unnecessary log and comment --------- Co-authored-by: RedContritio <>
fb228c4a · RedContritio · GitHub · e1a792fe · fb228c4a · fb228c4a
3 changed file
--- a/paddle/phi/kernels/funcs/scatter.cu.h
+++ b/paddle/phi/kernels/funcs/scatter.cu.h
@@ -28,6 +28,7 @@ namespace funcs {
 template <typename T, typename IndexT = int>
 __global__ void ScatterInitCUDAKernel(const IndexT* indices,
                                      T* output,
+                                      size_t output_count,
                                      size_t index_size,
                                      size_t slice_size) {
  CUDA_KERNEL_LOOP_TYPE(i, index_size * slice_size, int64_t) {
@@ -35,12 +36,14 @@ __global__ void ScatterInitCUDAKernel(const IndexT* indices,
    int64_t slice_i = i - indices_i * slice_size;  // offset inside the slice
    IndexT scatter_i = indices[indices_i];
-    PADDLE_ENFORCE(scatter_i >= 0,
+    PADDLE_ENFORCE(
-                   "The index is out of bounds, "
+        scatter_i >= 0 && scatter_i < output_count,
-                   "please check whether the dimensions of index and "
+        "The index is out of bounds, "
-                   "input meet the requirements. It should "
+        "please check whether the dimensions of index and "
-                   "be greater than or equal to 0, but received [%d]",
+        "input meet the requirements. It should "
-                   scatter_i);
+        "be less than [%d] and greater or equal to 0, but received [%d]",
+        output_count,
+        scatter_i);
    int64_t out_i = scatter_i * slice_size + slice_i;
    *(output + out_i) = static_cast<T>(0);
@@ -51,6 +54,7 @@ template <typename T, typename IndexT = int>
 __global__ void ScatterCUDAKernel(const T* params,
                                  const IndexT* indices,
                                  T* output,
+                                  size_t output_count,
                                  size_t index_size,
                                  size_t slice_size,
                                  bool overwrite) {
@@ -59,12 +63,14 @@ __global__ void ScatterCUDAKernel(const T* params,
    int64_t slice_i = i - indices_i * slice_size;  // offset inside the slice
    IndexT scatter_i = indices[indices_i];
-    PADDLE_ENFORCE(scatter_i >= 0,
+    PADDLE_ENFORCE(
-                   "The index is out of bounds, "
+        scatter_i >= 0 && scatter_i < output_count,
-                   "please check whether the dimensions of index and "
+        "The index is out of bounds, "
-                   "input meet the requirements. It should "
+        "please check whether the dimensions of index and "
-                   "be greater than or equal to 0, but received [%d]",
+        "input meet the requirements. It should "
-                   scatter_i);
+        "be less than [%d] and greater or equal to 0, but received [%d]",
+        output_count,
+        scatter_i);
    int64_t out_i = scatter_i * slice_size + slice_i;
    if (overwrite) {
@@ -143,8 +149,7 @@ void GPUScatterAssign(const phi::GPUContext& ctx,
  int64_t index_size = index.dims().size() == 0 ? 1 : index.dims()[0];
  auto src_dims = src.dims();
-  phi::DDim output_dims(src_dims);
+  phi::DDim output_dims = output->dims();
-  output_dims[0] = index_size;
  // slice size
  size_t slice_size = 1;
@@ -169,11 +174,16 @@ void GPUScatterAssign(const phi::GPUContext& ctx,
  // if not overwrite mode, init data
  if (!overwrite) {
    ScatterInitCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(
-        p_index, p_output, index_size, slice_size);
+        p_index, p_output, output_dims[0], index_size, slice_size);
  }
-  ScatterCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(
+  ScatterCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(p_src,
-      p_src, p_index, p_output, index_size, slice_size, overwrite);
+                                                                 p_index,
+                                                                 p_output,
+                                                                 output_dims[0],
+                                                                 index_size,
+                                                                 slice_size,
+                                                                 overwrite);
 }
 // The function is only for scatter grad x,
@@ -203,7 +213,7 @@ void GPUScatterGradForX(const phi::GPUContext& ctx,
  phi::backends::gpu::LimitGridDim(ctx, &grid);
  ScatterInitCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(
-      p_index, p_output, index_size, slice_size);
+      p_index, p_output, dst_dims[0], index_size, slice_size);
 }
 template <typename T, typename IndexT = int>

--- a/paddle/phi/kernels/funcs/scatter.h
+++ b/paddle/phi/kernels/funcs/scatter.h
@@ -99,6 +99,7 @@ void ScatterAssign(const phi::CPUContext& ctx,
  auto dst_dims = output->dims();
  const T* p_src = src.data<T>();
+  // IndexT is int32 or int64, so direct compare is allowed.
  const IndexT* p_index = index.data<IndexT>();
  T* p_output = output->data<T>();
@@ -140,6 +141,17 @@ void ScatterAssign(const phi::CPUContext& ctx,
                          "be greater than or equal to 0, but received [%d]",
                          index_));
+    PADDLE_ENFORCE_LT(
+        index_,
+        dst_dims[0],
+        phi::errors::OutOfRange(
+            "The index is out of bounds, "
+            "please check whether the values of index and "
+            "dimensions of input meet the requirements. each index should "
+            "be less than 1st-dim size (%d) of input, but received [%d]",
+            dst_dims[0],
+            index_));
    memcpy(p_output + index_ * slice_size, p_src + i * slice_size, slice_bytes);
  }
 }

--- a/python/paddle/fluid/tests/unittests/test_scatter_op.py
+++ b/python/paddle/fluid/tests/unittests/test_scatter_op.py
@@ -376,6 +376,32 @@ class TestScatterInplaceAPI(TestScatterAPI):
        self.scatter = paddle.scatter_
+@unittest.skipIf(core.is_compiled_with_cuda(), "CUDA will not throw exception")
+class TestScatterError(unittest.TestCase):
+    def test_scatter_index(self):
+        paddle.disable_static()
+        x = paddle.to_tensor([[1, 1], [2, 2], [3, 3]], dtype='float32')
+        def test_neg_index():
+            index = paddle.to_tensor([2, 1, -1, 1], dtype='int64')
+            updates = paddle.to_tensor(
+                [[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32'
+            )
+            out = paddle.scatter(x, index, updates)
+        self.assertRaises(IndexError, test_neg_index)
+        def test_too_big_index():
+            index = paddle.to_tensor([2, 1, 5, 1], dtype='int64')
+            updates = paddle.to_tensor(
+                [[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32'
+            )
+            out = paddle.scatter(x, index, updates)
+        self.assertRaises(IndexError, test_too_big_index)
+        paddle.enable_static()
 if __name__ == "__main__":
    paddle.enable_static()
    unittest.main()