From 5acd764d344e72c9b6a8c7e09ce18bf105bee301 Mon Sep 17 00:00:00 2001 From: Zhang Zheng <32410583+ZzSean@users.noreply.github.com> Date: Fri, 6 May 2022 16:00:55 +0800 Subject: [PATCH] Fix the implementation of fused_fast_ln_fwd_kernel in test mode (#42527) --- .../fused/fused_layernorm_residual_dropout_bias.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h index aa613dd3f5..866de8e04a 100644 --- a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h +++ b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h @@ -298,10 +298,16 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void fused_fast_ln_fwd_kernel( for (int it = 0, col = c; it < LDGS; it++) { phi::Store( x[it], residual_out_ptr + row * ELTS_PER_ROW + col * VecSize); - phi::Store( - mask_vec[it], mask_out_ptr + row * ELTS_PER_ROW + col * VecSize); col += THREADS_PER_ROW; } + if (!is_test) { +#pragma unroll + for (int it = 0, col = c; it < LDGS; it++) { + phi::Store( + mask_vec[it], mask_out_ptr + row * ELTS_PER_ROW + col * VecSize); + col += THREADS_PER_ROW; + } + } U mu_local = 0.f; #pragma unroll -- GitLab