提交 0d7b1fd4 编写于 作者: Q qijun

fix bug in SelectedRowsAddTensorKernel

上级 ab5dc9fe
......@@ -89,7 +89,7 @@ __global__ void SelectedRowsAddTensorKernel(const T* selected_rows,
// Since index in rows of SelectedRows can be duplicate, we can not use
// tensor_out[index] += selected_rows[index]; Instead, we have to use
// AtomicAdd to avoid concurrent write error.
paddle::platform::CudaAtomicAdd(&tensor_out[index], selected_rows[index]);
paddle::platform::CudaAtomicAdd(tensor_out + index, selected_rows[index]);
}
}
} // namespace
......@@ -121,7 +121,7 @@ struct SelectedRowsAddTensor<platform::GPUPlace, T> {
int block_size = 256;
dim3 threads(block_size, 1);
dim3 grid(1, in1_height);
dim3 grid(1, in1_rows.size());
SelectedRowsAddTensorKernel<
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册