From dca56f47f56f0b0ea04176531bb131de00235394 Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Wed, 5 Aug 2020 21:53:14 +0800 Subject: [PATCH] fix invalid read of pnorm gradient function fix invalid read of pnorm gradient function and delete the unused code --- paddle/fluid/operators/p_norm_op.cu | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/paddle/fluid/operators/p_norm_op.cu b/paddle/fluid/operators/p_norm_op.cu index a0b3df6d6e7..d9ac98ff880 100644 --- a/paddle/fluid/operators/p_norm_op.cu +++ b/paddle/fluid/operators/p_norm_op.cu @@ -99,39 +99,25 @@ __global__ void PnormGradient(const T* x, const T* x_norm, const T* y_grad, const float porder, const int pre, const int axis_n, const int post, const T eps, T* x_grad) { - typedef cub::BlockReduce BlockReduce; - __shared__ typename BlockReduce::TempStorage temp_storage_sum; // dx = (x/pnorm_broadcast).pow(p-1) * norm_dy.broadcast * sign(x) int num = pre * post; auto porder_grad = static_cast(porder - 1.0f); for (int i = blockIdx.x; i < num; i += gridDim.x) { - T sum = 0.0; - __shared__ T row_sum; - __shared__ T row_sqrt_norm; - __shared__ T row_norm; + __shared__ T pnorm_i; + __shared__ T yout_i; auto base = (i / post) * post * axis_n + (i % post); - for (int j = threadIdx.x; j < axis_n; j += blockDim.x) { - int index = base + j * post; - sum += x[index] * y_grad[index]; - } - T reduce_result = BlockReduce(temp_storage_sum).Sum(sum); - if (threadIdx.x == 0) { - row_sum = reduce_result; - row_sqrt_norm = x_norm[i]; - row_norm = row_sqrt_norm * row_sqrt_norm; + pnorm_i = x_norm[i]; + yout_i = y_grad[i]; } - __syncthreads(); - const T pnorm_i = x_norm[i]; - const T yout_i = y_grad[i]; + __syncthreads(); for (int j = threadIdx.x; j < axis_n; j += blockDim.x) { int index = base + j * post; const T x_ij = inline_abs(x[index]); - const T dy_ij = y_grad[index]; x_grad[index] = inline_pow(x_ij, porder_grad) / (inline_pow(pnorm_i, porder_grad) + eps) * yout_i * inline_sign(x[index]); -- GitLab