未验证 提交 145c3a75 编写于 作者: C Connor Holmes 提交者: GitHub

Fix missing scale attributes for GPTJ (#3256)

Co-authored-by: NOlatunji Ruwase <olruwase@microsoft.com>
Co-authored-by: NMichael Wyatt <michaelwyatt@microsoft.com>
上级 ad168a69
......@@ -462,9 +462,9 @@ std::vector<at::Tensor> ds_softmax_context(at::Tensor& query_key_value,
T* workspace = (T*)InferenceContext::Instance().GetWorkSpace();
size_t buf_size = bsz * seq_len * hidden_dim;
auto output = torch::from_blob(workspace + 3 * buf_size, {bsz, seq_len, hidden_dim}, options);
auto output = torch::from_blob(workspace + 4 * buf_size, {bsz, seq_len, hidden_dim}, options);
auto query_cont = workspace + 4 * buf_size;
auto query_cont = workspace + 5 * buf_size;
size_t offset =
10 * (hidden_dim * bsz * InferenceContext::Instance().GetMaxTokenLenght()) +
layer_id * 2 * bsz * InferenceContext::Instance().GetMaxTokenLenght() * hidden_dim;
......
......@@ -23,7 +23,9 @@ class GELUGemmOp(BaseOp):
bias: torch.Tensor,
weight_out: torch.Tensor,
async_op: bool = False):
output = self.fused_gemm_gelu(input, weight, weight.scale, bias, weight_out, weight_out.scale,
output = self.fused_gemm_gelu(input, weight, weight.scale if hasattr(weight, "scale") else torch.empty(1),
bias, weight_out,
weight_out.scale if hasattr(weight_out, "scale") else torch.empty(1),
self.config.epsilon, self.config.pre_layer_norm, self.config.q_int8, async_op,
self.config.transposed_mode)
return output
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册