From d128c28678676d36701dfa07f89b8e2f096dfca8 Mon Sep 17 00:00:00 2001 From: sunli Date: Wed, 7 Jul 2021 10:32:11 +0800 Subject: [PATCH] optimize index computation of roll (#33909) --- paddle/fluid/operators/roll_op.cu | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/roll_op.cu b/paddle/fluid/operators/roll_op.cu index ce93c5f984e..34d4d67e39d 100644 --- a/paddle/fluid/operators/roll_op.cu +++ b/paddle/fluid/operators/roll_op.cu @@ -36,13 +36,16 @@ __global__ void RollCudaKernel(const T* input, T* output, int64_t N, } int64_t output_idx = idx; - int64_t dim_idx, dim_idx_shift; + int64_t new_dim_idx = 0; -#pragma unroll Rank +#pragma unroll for (size_t i = 0; i < Rank; i++) { - dim_idx = (idx / strides[i]) % sizes[i]; - dim_idx_shift = (dim_idx + shifts[i]) % sizes[i]; - output_idx = output_idx + (dim_idx_shift - dim_idx) * strides[i]; + new_dim_idx = (idx / strides[i]) % sizes[i] + shifts[i]; + if (new_dim_idx >= sizes[i]) { + output_idx += (shifts[i] - sizes[i]) * strides[i]; + } else { + output_idx += shifts[i] * strides[i]; + } } output[output_idx] = input[idx]; } -- GitLab