未验证 提交 fb228c4a 编写于 作者: R RedContritio 提交者: GitHub

Fix UFA非法地址访问(UFA illegal address access) of case2: paddle.scatter (#50025)

* add dim check in scatter

* add check in scatter.cu

* add unittest

* remove unnecessary log and comment

---------

Co-authored-by: RedContritio <>
上级 e1a792fe
......@@ -28,6 +28,7 @@ namespace funcs {
template <typename T, typename IndexT = int>
__global__ void ScatterInitCUDAKernel(const IndexT* indices,
T* output,
size_t output_count,
size_t index_size,
size_t slice_size) {
CUDA_KERNEL_LOOP_TYPE(i, index_size * slice_size, int64_t) {
......@@ -35,12 +36,14 @@ __global__ void ScatterInitCUDAKernel(const IndexT* indices,
int64_t slice_i = i - indices_i * slice_size; // offset inside the slice
IndexT scatter_i = indices[indices_i];
PADDLE_ENFORCE(scatter_i >= 0,
"The index is out of bounds, "
"please check whether the dimensions of index and "
"input meet the requirements. It should "
"be greater than or equal to 0, but received [%d]",
scatter_i);
PADDLE_ENFORCE(
scatter_i >= 0 && scatter_i < output_count,
"The index is out of bounds, "
"please check whether the dimensions of index and "
"input meet the requirements. It should "
"be less than [%d] and greater or equal to 0, but received [%d]",
output_count,
scatter_i);
int64_t out_i = scatter_i * slice_size + slice_i;
*(output + out_i) = static_cast<T>(0);
......@@ -51,6 +54,7 @@ template <typename T, typename IndexT = int>
__global__ void ScatterCUDAKernel(const T* params,
const IndexT* indices,
T* output,
size_t output_count,
size_t index_size,
size_t slice_size,
bool overwrite) {
......@@ -59,12 +63,14 @@ __global__ void ScatterCUDAKernel(const T* params,
int64_t slice_i = i - indices_i * slice_size; // offset inside the slice
IndexT scatter_i = indices[indices_i];
PADDLE_ENFORCE(scatter_i >= 0,
"The index is out of bounds, "
"please check whether the dimensions of index and "
"input meet the requirements. It should "
"be greater than or equal to 0, but received [%d]",
scatter_i);
PADDLE_ENFORCE(
scatter_i >= 0 && scatter_i < output_count,
"The index is out of bounds, "
"please check whether the dimensions of index and "
"input meet the requirements. It should "
"be less than [%d] and greater or equal to 0, but received [%d]",
output_count,
scatter_i);
int64_t out_i = scatter_i * slice_size + slice_i;
if (overwrite) {
......@@ -143,8 +149,7 @@ void GPUScatterAssign(const phi::GPUContext& ctx,
int64_t index_size = index.dims().size() == 0 ? 1 : index.dims()[0];
auto src_dims = src.dims();
phi::DDim output_dims(src_dims);
output_dims[0] = index_size;
phi::DDim output_dims = output->dims();
// slice size
size_t slice_size = 1;
......@@ -169,11 +174,16 @@ void GPUScatterAssign(const phi::GPUContext& ctx,
// if not overwrite mode, init data
if (!overwrite) {
ScatterInitCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(
p_index, p_output, index_size, slice_size);
p_index, p_output, output_dims[0], index_size, slice_size);
}
ScatterCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(
p_src, p_index, p_output, index_size, slice_size, overwrite);
ScatterCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(p_src,
p_index,
p_output,
output_dims[0],
index_size,
slice_size,
overwrite);
}
// The function is only for scatter grad x,
......@@ -203,7 +213,7 @@ void GPUScatterGradForX(const phi::GPUContext& ctx,
phi::backends::gpu::LimitGridDim(ctx, &grid);
ScatterInitCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(
p_index, p_output, index_size, slice_size);
p_index, p_output, dst_dims[0], index_size, slice_size);
}
template <typename T, typename IndexT = int>
......
......@@ -99,6 +99,7 @@ void ScatterAssign(const phi::CPUContext& ctx,
auto dst_dims = output->dims();
const T* p_src = src.data<T>();
// IndexT is int32 or int64, so direct compare is allowed.
const IndexT* p_index = index.data<IndexT>();
T* p_output = output->data<T>();
......@@ -140,6 +141,17 @@ void ScatterAssign(const phi::CPUContext& ctx,
"be greater than or equal to 0, but received [%d]",
index_));
PADDLE_ENFORCE_LT(
index_,
dst_dims[0],
phi::errors::OutOfRange(
"The index is out of bounds, "
"please check whether the values of index and "
"dimensions of input meet the requirements. each index should "
"be less than 1st-dim size (%d) of input, but received [%d]",
dst_dims[0],
index_));
memcpy(p_output + index_ * slice_size, p_src + i * slice_size, slice_bytes);
}
}
......
......@@ -376,6 +376,32 @@ class TestScatterInplaceAPI(TestScatterAPI):
self.scatter = paddle.scatter_
@unittest.skipIf(core.is_compiled_with_cuda(), "CUDA will not throw exception")
class TestScatterError(unittest.TestCase):
def test_scatter_index(self):
paddle.disable_static()
x = paddle.to_tensor([[1, 1], [2, 2], [3, 3]], dtype='float32')
def test_neg_index():
index = paddle.to_tensor([2, 1, -1, 1], dtype='int64')
updates = paddle.to_tensor(
[[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32'
)
out = paddle.scatter(x, index, updates)
self.assertRaises(IndexError, test_neg_index)
def test_too_big_index():
index = paddle.to_tensor([2, 1, 5, 1], dtype='int64')
updates = paddle.to_tensor(
[[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32'
)
out = paddle.scatter(x, index, updates)
self.assertRaises(IndexError, test_too_big_index)
paddle.enable_static()
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册