From efa8aded4a3280c5df67b322970c34a05d71f153 Mon Sep 17 00:00:00 2001 From: Arash Bakhtiari Date: Mon, 12 Sep 2022 11:45:32 -0700 Subject: [PATCH] Fix the residual add mp scaling for GPTNeoX (#2310) --- csrc/transformer/inference/csrc/gelu.cu | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/csrc/transformer/inference/csrc/gelu.cu b/csrc/transformer/inference/csrc/gelu.cu index ee952f50..5eb30478 100644 --- a/csrc/transformer/inference/csrc/gelu.cu +++ b/csrc/transformer/inference/csrc/gelu.cu @@ -321,10 +321,10 @@ __global__ void gptj_residual_add(float* input, if (attnbias) { float4 attn_bias = attnbias_cast[offset % intermediate_size]; - data.x += attn_bias.x * mp_scale; - data.y += attn_bias.y * mp_scale; - data.z += attn_bias.z * mp_scale; - data.w += attn_bias.w * mp_scale; + data.x += attn_bias.x; + data.y += attn_bias.y; + data.z += attn_bias.z; + data.w += attn_bias.w; } data.x = out.x + res_vec.x + (data.x + bias_data.x) * mp_scale; data.y = out.y + res_vec.y + (data.y + bias_data.y) * mp_scale; @@ -383,10 +383,10 @@ __global__ void gptj_residual_add(__half* input, __half2* attnbias_half = reinterpret_cast<__half2*>(&attn_bias_vec); float2 attn_low_bias = __half22float2(attnbias_half[0]); float2 attn_high_bias = __half22float2(attnbias_half[1]); - low_data.x += attn_low_bias.x * mp_scale; - low_data.y += attn_low_bias.y * mp_scale; - high_data.x += attn_high_bias.x * mp_scale; - high_data.y += attn_high_bias.y * mp_scale; + low_data.x += attn_low_bias.x; + low_data.y += attn_low_bias.y; + high_data.x += attn_high_bias.x; + high_data.y += attn_high_bias.y; } low_data.x = low_res.x + low_out.x + (low_data.x + low_bias.x) * mp_scale; -- GitLab