From efa8aded4a3280c5df67b322970c34a05d71f153 Mon Sep 17 00:00:00 2001
From: Arash Bakhtiari <arash@bakhtiari.org>
Date: Mon, 12 Sep 2022 11:45:32 -0700
Subject: [PATCH] Fix the residual add mp scaling for  GPTNeoX (#2310)

---
 csrc/transformer/inference/csrc/gelu.cu | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/csrc/transformer/inference/csrc/gelu.cu b/csrc/transformer/inference/csrc/gelu.cu
index ee952f50..5eb30478 100644
--- a/csrc/transformer/inference/csrc/gelu.cu
+++ b/csrc/transformer/inference/csrc/gelu.cu
@@ -321,10 +321,10 @@ __global__ void gptj_residual_add(float* input,
 
         if (attnbias) {
             float4 attn_bias = attnbias_cast[offset % intermediate_size];
-            data.x += attn_bias.x * mp_scale;
-            data.y += attn_bias.y * mp_scale;
-            data.z += attn_bias.z * mp_scale;
-            data.w += attn_bias.w * mp_scale;
+            data.x += attn_bias.x;
+            data.y += attn_bias.y;
+            data.z += attn_bias.z;
+            data.w += attn_bias.w;
         }
         data.x = out.x + res_vec.x + (data.x + bias_data.x) * mp_scale;
         data.y = out.y + res_vec.y + (data.y + bias_data.y) * mp_scale;
@@ -383,10 +383,10 @@ __global__ void gptj_residual_add(__half* input,
             __half2* attnbias_half = reinterpret_cast<__half2*>(&attn_bias_vec);
             float2 attn_low_bias = __half22float2(attnbias_half[0]);
             float2 attn_high_bias = __half22float2(attnbias_half[1]);
-            low_data.x += attn_low_bias.x * mp_scale;
-            low_data.y += attn_low_bias.y * mp_scale;
-            high_data.x += attn_high_bias.x * mp_scale;
-            high_data.y += attn_high_bias.y * mp_scale;
+            low_data.x += attn_low_bias.x;
+            low_data.y += attn_low_bias.y;
+            high_data.x += attn_high_bias.x;
+            high_data.y += attn_high_bias.y;
         }
 
         low_data.x = low_res.x + low_out.x + (low_data.x + low_bias.x) * mp_scale;
-- 
GitLab