From 5d7294578434b26c80bc01a40266a22c54756c25 Mon Sep 17 00:00:00 2001 From: Leo Chen <39020268+leo0519@users.noreply.github.com> Date: Wed, 27 Apr 2022 17:28:08 +0800 Subject: [PATCH] Fix the race condition in cumsum operator (#42205) * Fix the race condition in cumsum operator * Optimize cumsum operator --- paddle/phi/kernels/gpu/cumsum_kernel.cu | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/paddle/phi/kernels/gpu/cumsum_kernel.cu b/paddle/phi/kernels/gpu/cumsum_kernel.cu index e04f2b5f876..13975ddd3ef 100644 --- a/paddle/phi/kernels/gpu/cumsum_kernel.cu +++ b/paddle/phi/kernels/gpu/cumsum_kernel.cu @@ -39,14 +39,12 @@ __device__ void BlockReverse( int tx = threadIdx.x; int offset = tx; - int in_index = src_base + offset; - if (offset >= valid_item) { - sh_mem[offset] = 0; - } else { - int sh_mem_index = BLOCK_SIZE - offset - 1; - T data = idata[in_index]; - sh_mem[sh_mem_index] = data; + T src_data = 0; + int src_offset = BLOCK_SIZE - offset - 1; + if (src_offset < valid_item) { + src_data = idata[src_base + src_offset]; } + sh_mem[offset] = src_data; __syncthreads(); int out_index = dst_base - offset; -- GitLab