From f57d706aa7a02ec35ba3b5d1293adeba253b55a9 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 5 Sep 2018 03:07:59 +0000 Subject: [PATCH] Use double to reduce --- paddle/fluid/operators/layer_norm_op.cu | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/operators/layer_norm_op.cu b/paddle/fluid/operators/layer_norm_op.cu index 0886c41a1..22343d772 100644 --- a/paddle/fluid/operators/layer_norm_op.cu +++ b/paddle/fluid/operators/layer_norm_op.cu @@ -67,27 +67,27 @@ template __global__ void LayerNormForward(const T *x, const T *scale, const T *bias, T *y, T *mean, T *var, float epsilon, int feature_size) { - using BlockReduce = cub::BlockReduce, BlockDim>; + using BlockReduce = cub::BlockReduce, BlockDim>; __shared__ typename BlockReduce::TempStorage temp_storage; int beg_idx = blockIdx.x * feature_size + threadIdx.x; int end_idx = (blockIdx.x + 1) * feature_size; // Step 1: Reduce to calculate mean and var - T mean_val = static_cast(0); - T var_val = static_cast(0); + double mean_val = 0; + double var_val = 0; for (int i = beg_idx; i < end_idx; i += BlockDim) { T tmp = x[i]; mean_val += tmp; var_val += (tmp * tmp); } auto pair = BlockReduce(temp_storage) - .Reduce(PairForLayerNorm(mean_val, var_val), - PairForLayerNormAddFunctor()); + .Reduce(PairForLayerNorm(mean_val, var_val), + PairForLayerNormAddFunctor()); if (threadIdx.x == 0) { auto tmp = pair.first_ / feature_size; - mean[blockIdx.x] = tmp; - var[blockIdx.x] = pair.second_ / feature_size - tmp * tmp; + mean[blockIdx.x] = static_cast(tmp); + var[blockIdx.x] = static_cast(pair.second_ / feature_size - tmp * tmp); } __syncthreads(); mean_val = mean[blockIdx.x]; -- GitLab