Fix atomicAdd in grid sample op and affine grid op (#26647)

test=develop

Fix atomicAdd in grid sample op and affine grid op (#26647)
test=develop
f8238411 · whs · GitHub · 32ba8602 · f8238411 · f8238411
隐藏空白更改
内联并排

Showing with 7 addition and 7 deletion

paddle/fluid/operators/affine_grid_op.cu paddle/fluid/operators/affine_grid_op.cu +6 -6

paddle/fluid/operators/grid_sampler_op.cu paddle/fluid/operators/grid_sampler_op.cu +1 -1

未找到文件。
--- a/paddle/fluid/operators/affine_grid_op.cu
+++ b/paddle/fluid/operators/affine_grid_op.cu
@@ -86,14 +86,14 @@ __global__ void affine_grid_grad_kernel(const int count, int n, int out_h,

    int theta_offset = n * 6;  // 2 * 3;
    T out_grad_x = out_grad[index * 2];
-    atomicAdd(theta_grad + theta_offset, out_grad_x * h_coor);
-    atomicAdd(theta_grad + theta_offset + 1, out_grad_x * w_coor);
-    atomicAdd(theta_grad + theta_offset + 2, out_grad_x);
+    platform::CudaAtomicAdd(theta_grad + theta_offset, out_grad_x * h_coor);
+    platform::CudaAtomicAdd(theta_grad + theta_offset + 1, out_grad_x * w_coor);
+    platform::CudaAtomicAdd(theta_grad + theta_offset + 2, out_grad_x);

    T out_grad_y = out_grad[index * 2 + 1];
-    atomicAdd(theta_grad + theta_offset + 3, out_grad_y * h_coor);
-    atomicAdd(theta_grad + theta_offset + 4, out_grad_y * w_coor);
-    atomicAdd(theta_grad + theta_offset + 5, out_grad_y);
+    platform::CudaAtomicAdd(theta_grad + theta_offset + 3, out_grad_y * h_coor);
+    platform::CudaAtomicAdd(theta_grad + theta_offset + 4, out_grad_y * w_coor);
+    platform::CudaAtomicAdd(theta_grad + theta_offset + 5, out_grad_y);
  }
 }


--- a/paddle/fluid/operators/grid_sampler_op.cu
+++ b/paddle/fluid/operators/grid_sampler_op.cu
@@ -31,7 +31,7 @@ static __forceinline__ __device__ void atomic_add(T* data, int h, int w, int sH,
                                                  int sW, int H, int W,
                                                  T delta) {
  if (in_bounds(h, w, H, W)) {
-    atomicAdd(data + h * sH + w * sW, delta);
+    platform::CudaAtomicAdd(data + h * sH + w * sW, delta);
  }
 }