提交 0d7b1fd4 编写于 作者: Q qijun

fix bug in SelectedRowsAddTensorKernel

上级 ab5dc9fe
...@@ -89,7 +89,7 @@ __global__ void SelectedRowsAddTensorKernel(const T* selected_rows, ...@@ -89,7 +89,7 @@ __global__ void SelectedRowsAddTensorKernel(const T* selected_rows,
// Since index in rows of SelectedRows can be duplicate, we can not use // Since index in rows of SelectedRows can be duplicate, we can not use
// tensor_out[index] += selected_rows[index]; Instead, we have to use // tensor_out[index] += selected_rows[index]; Instead, we have to use
// AtomicAdd to avoid concurrent write error. // AtomicAdd to avoid concurrent write error.
paddle::platform::CudaAtomicAdd(&tensor_out[index], selected_rows[index]); paddle::platform::CudaAtomicAdd(tensor_out + index, selected_rows[index]);
} }
} }
} // namespace } // namespace
...@@ -121,7 +121,7 @@ struct SelectedRowsAddTensor<platform::GPUPlace, T> { ...@@ -121,7 +121,7 @@ struct SelectedRowsAddTensor<platform::GPUPlace, T> {
int block_size = 256; int block_size = 256;
dim3 threads(block_size, 1); dim3 threads(block_size, 1);
dim3 grid(1, in1_height); dim3 grid(1, in1_rows.size());
SelectedRowsAddTensorKernel< SelectedRowsAddTensorKernel<
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册