未验证 提交 068d905e 编写于 作者: W wangchaochaohu 提交者: GitHub

fix the shape choose of vectorize for cuda

上级 a0b60716
......@@ -348,7 +348,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> {
} else {
size_t thread_nums = 1024;
size_t block_nums = (width + thread_nums - 1) / thread_nums;
int vec_size = VectorizedSize<T>(dx_data);
int vec_size = VectorizedSize<T>(dout_data);
if (vec_size == 4 && width % 4 == 0) {
block_nums = (width / vec_size + thread_nums - 1) / thread_nums;
VecMatrixReduceLongWidth<T,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册