未验证 提交 5d729457 编写于 作者: L Leo Chen 提交者: GitHub

Fix the race condition in cumsum operator (#42205)

* Fix the race condition in cumsum operator

* Optimize cumsum operator
上级 00ed8b57
......@@ -39,14 +39,12 @@ __device__ void BlockReverse(
int tx = threadIdx.x;
int offset = tx;
int in_index = src_base + offset;
if (offset >= valid_item) {
sh_mem[offset] = 0;
} else {
int sh_mem_index = BLOCK_SIZE - offset - 1;
T data = idata[in_index];
sh_mem[sh_mem_index] = data;
T src_data = 0;
int src_offset = BLOCK_SIZE - offset - 1;
if (src_offset < valid_item) {
src_data = idata[src_base + src_offset];
}
sh_mem[offset] = src_data;
__syncthreads();
int out_index = dst_base - offset;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册