fix randperm out of bound bug (#42057)

a6794926 · zhouweiwei2014 · GitHub · b20683c0 · a6794926
隐藏空白更改
内联并排

Showing with 21 addition and 18 deletion

paddle/phi/kernels/gpu/randperm_kernel.cu paddle/phi/kernels/gpu/randperm_kernel.cu +21 -18

未找到文件。
--- a/paddle/phi/kernels/gpu/randperm_kernel.cu
+++ b/paddle/phi/kernels/gpu/randperm_kernel.cu
@@ -36,26 +36,29 @@ DECLARE_bool(use_curand);
 namespace phi {
-template <typename T>
+template <typename keyT, typename dataT>
-__global__ void SwapRepeatKernel(
+__global__ void SwapRepeatKernel(keyT* key_out_data,
-    int* key, T* data, int n, uint64_t seed, uint64_t offset) {
+                                 dataT* out_data,
+                                 int n,
+                                 uint64_t seed,
+                                 uint64_t offset) {
  size_t idx = static_cast<size_t>(blockIdx.x * blockDim.x + threadIdx.x);
-  if (idx < n) return;
+  if (idx >= n - 1) return;  // out of range
-  bool first_repeat = false;
+  bool is_first_repeat = false;
-  if (data[idx] == data[idx + 1]) {
+  if (key_out_data[idx] == key_out_data[idx + 1]) {
    if (idx == 0) {
-      first_repeat = true;
+      is_first_repeat = true;
-    } else if (data[idx] != data[idx - 1]) {
+    } else if (key_out_data[idx] != key_out_data[idx - 1]) {
-      first_repeat = true;
+      is_first_repeat = true;
    }
  }
-  if (!first_repeat) return;
+  if (!is_first_repeat) return;
  int repeat_size = 1;
  for (int i = idx; i < n; ++i) {
-    if (data[i] == data[i + 1]) {
+    if (key_out_data[i] == key_out_data[i + 1]) {
      ++repeat_size;
    } else {
      break;
@@ -74,9 +77,9 @@ __global__ void SwapRepeatKernel(
    uint32_t r = hiprand(&state) % (i + 1);
 #endif
    if (r != i) {
-      T tmp = data[idx + i];
+      dataT tmp = out_data[idx + i];
-      data[idx + i] = data[idx + r];
+      out_data[idx + i] = out_data[idx + r];
-      data[idx + r] = tmp;
+      out_data[idx + r] = tmp;
    }
  }
 }
@@ -138,10 +141,10 @@ void RandpermRawKernel(
  auto seed_offset = gen_cuda->IncrementOffset(n);
  auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, n);
-  SwapRepeatKernel<T><<<config.block_per_grid.x,
+  SwapRepeatKernel<<<config.block_per_grid.x,
-                        config.thread_per_block.x,
+                     config.thread_per_block.x,
-                        0,
+                     0,
-                        dev_ctx.stream()>>>(
+                     dev_ctx.stream()>>>(
      key_out.data<int>(), out_data, n, seed_offset.first, seed_offset.second);
 }