From 50df0170c6f3500edcbe703f0bb32beeb9d267c8 Mon Sep 17 00:00:00 2001 From: limingshu <61349199+JamesLim-sy@users.noreply.github.com> Date: Wed, 15 Mar 2023 17:44:19 +0800 Subject: [PATCH] first commit (#51683) --- paddle/phi/backends/gpu/gpu_utils.h | 6 ++++-- paddle/phi/kernels/funcs/transpose_function.cu.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/paddle/phi/backends/gpu/gpu_utils.h b/paddle/phi/backends/gpu/gpu_utils.h index ea97a086afc..0bb0aef7be1 100644 --- a/paddle/phi/backends/gpu/gpu_utils.h +++ b/paddle/phi/backends/gpu/gpu_utils.h @@ -86,7 +86,8 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType FlatTensorIndex(const Index3& index, const Dim3& dims) { IndexType flat_index = index[0]; - for (int i = 1; i < 3; i++) { +#pragma unroll + for (int i = 1; i < 3; ++i) { flat_index = flat_index * dims[i] + index[i]; } return flat_index; @@ -97,7 +98,8 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index3 ConvertTensorIndex(IndexType index, const Dim3& dims) { Index3 tensor_index; - for (int i = 2; i >= 0; i--) { +#pragma unroll + for (int i = 2; i >= 0; --i) { IndexType new_index = index / dims[i]; tensor_index[i] = static_cast(index - dims[i] * new_index); index = new_index; diff --git a/paddle/phi/kernels/funcs/transpose_function.cu.h b/paddle/phi/kernels/funcs/transpose_function.cu.h index f49a3785622..00a9b528f6f 100644 --- a/paddle/phi/kernels/funcs/transpose_function.cu.h +++ b/paddle/phi/kernels/funcs/transpose_function.cu.h @@ -153,7 +153,7 @@ __global__ void TilingSwapDim1And2(const T* __restrict__ input, if (x < in_effective_thread_num) { // Read a tile from input using block. int x_i = x / TileY; - int x_j = x % TileY; + int x_j = x - x_i * TileY; IndexType input_ind = input_origin_block_flat_index + x_i * input_dims[2] + x_j; IndexType input_inc = BlockReadRows * input_dims[2]; -- GitLab