提交 63022e66 编写于 作者: V Varuna Jayasiri

fix

上级 d7af20a6
......@@ -521,18 +521,18 @@ class LayerGenerator:
#
with monit.section('Covert to int8'):
layer.attention.output = make_llm_int8_linear(layer.attention.output,
device=self.device,
threshold=self.llm_int8_threshold)
layer.attention.qkv_lin = make_llm_int8_linear(layer.attention.qkv_lin,
device=self.device,
threshold=self.llm_int8_threshold)
layer.ffn.dense_h_h4 = make_llm_int8_linear(layer.ffn.dense_h_h4,
device=self.device,
threshold=self.llm_int8_threshold)
layer.ffn.dense_h4_h = make_llm_int8_linear(layer.ffn.dense_h4_h,
device=self.device,
threshold=self.llm_int8_threshold)
layer.attention._modules['output'] = make_llm_int8_linear(layer.attention.output,
device=self.device,
threshold=self.llm_int8_threshold)
layer.attention._modules['qkv_lin'] = make_llm_int8_linear(layer.attention.qkv_lin,
device=self.device,
threshold=self.llm_int8_threshold)
layer.ffn._modules['dense_h_h4'] = make_llm_int8_linear(layer.ffn.dense_h_h4,
device=self.device,
threshold=self.llm_int8_threshold)
layer.ffn._modules['dense_h4_h'] = make_llm_int8_linear(layer.ffn.dense_h4_h,
device=self.device,
threshold=self.llm_int8_threshold)
#
return layer
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册