From 0d7b1fd4feed8d5249c6904f10126c7d58319503 Mon Sep 17 00:00:00 2001
From: qijun <qijun1994@hotmail.com>
Date: Mon, 16 Oct 2017 08:28:03 +0000
Subject: [PATCH] fix bug in SelectedRowsAddTensorKernel

---
 paddle/operators/math/selected_rows_functor.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/paddle/operators/math/selected_rows_functor.cu b/paddle/operators/math/selected_rows_functor.cu
index a406bef39..ea149ebbc 100644
--- a/paddle/operators/math/selected_rows_functor.cu
+++ b/paddle/operators/math/selected_rows_functor.cu
@@ -89,7 +89,7 @@ __global__ void SelectedRowsAddTensorKernel(const T* selected_rows,
     // Since index in rows of SelectedRows can be duplicate, we can not use
     // tensor_out[index] += selected_rows[index]; Instead, we have to use
     // AtomicAdd to avoid concurrent write error.
-    paddle::platform::CudaAtomicAdd(&tensor_out[index], selected_rows[index]);
+    paddle::platform::CudaAtomicAdd(tensor_out + index, selected_rows[index]);
   }
 }
 }  // namespace
@@ -121,7 +121,7 @@ struct SelectedRowsAddTensor<platform::GPUPlace, T> {
 
     int block_size = 256;
     dim3 threads(block_size, 1);
-    dim3 grid(1, in1_height);
+    dim3 grid(1, in1_rows.size());
     SelectedRowsAddTensorKernel<
         T><<<grid, threads, 0,
              reinterpret_cast<const platform::CUDADeviceContext&>(context)
-- 
GitLab