未验证 提交 81511469 编写于 作者: N niuliling123 提交者: GitHub

Fix a bug in VecAutomaticAddPerBlock (#55929)

上级 9f2d88e9
......@@ -711,7 +711,7 @@ __device__ __forceinline__ void VectorizedAtomicAddPerBlock(
using NVT = typename VecAtomicAddHelper<T>::NVT;
using NVVec2T = typename VecAtomicAddHelper<T>::NVVec2T;
bool aligned_half2 =
(reinterpret_cast<std::uintptr_t>(out) % sizeof(NVT) == 0);
(reinterpret_cast<std::uintptr_t>(out) % sizeof(NVVec2T) == 0);
if (aligned_half2) {
for (i = tid * 2; i < loops; i += threads_per_block * 2) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册