Fix the residual add mp scaling for GPTNeoX (#2310)

efa8aded · Arash Bakhtiari · GitHub · a691ec60 · efa8aded
显示空白变更内容
内联并排

Showing with 8 addition and 8 deletion

csrc/transformer/inference/csrc/gelu.cu csrc/transformer/inference/csrc/gelu.cu +8 -8

未找到文件。
--- a/csrc/transformer/inference/csrc/gelu.cu
+++ b/csrc/transformer/inference/csrc/gelu.cu
@@ -321,10 +321,10 @@ __global__ void gptj_residual_add(float* input,

        if (attnbias) {
            float4 attn_bias = attnbias_cast[offset % intermediate_size];
-            data.x += attn_bias.x * mp_scale;
-            data.y += attn_bias.y * mp_scale;
-            data.z += attn_bias.z * mp_scale;
-            data.w += attn_bias.w * mp_scale;
+            data.x += attn_bias.x;
+            data.y += attn_bias.y;
+            data.z += attn_bias.z;
+            data.w += attn_bias.w;
        }
        data.x = out.x + res_vec.x + (data.x + bias_data.x) * mp_scale;
        data.y = out.y + res_vec.y + (data.y + bias_data.y) * mp_scale;
@@ -383,10 +383,10 @@ __global__ void gptj_residual_add(__half* input,
            __half2* attnbias_half = reinterpret_cast<__half2*>(&attn_bias_vec);
            float2 attn_low_bias = __half22float2(attnbias_half[0]);
            float2 attn_high_bias = __half22float2(attnbias_half[1]);
-            low_data.x += attn_low_bias.x * mp_scale;
-            low_data.y += attn_low_bias.y * mp_scale;
-            high_data.x += attn_high_bias.x * mp_scale;
-            high_data.y += attn_high_bias.y * mp_scale;
+            low_data.x += attn_low_bias.x;
+            low_data.y += attn_low_bias.y;
+            high_data.x += attn_high_bias.x;
+            high_data.y += attn_high_bias.y;
        }

        low_data.x = low_res.x + low_out.x + (low_data.x + low_bias.x) * mp_scale;