fix

63022e66 · Varuna Jayasiri · d7af20a6 · 63022e66
隐藏空白更改
内联并排

Showing with 12 addition and 12 deletion

labml_nn/neox/model.py labml_nn/neox/model.py +12 -12

未找到文件。
--- a/labml_nn/neox/model.py
+++ b/labml_nn/neox/model.py
@@ -521,18 +521,18 @@ class LayerGenerator:

                #
                with monit.section('Covert to int8'):
-                    layer.attention.output = make_llm_int8_linear(layer.attention.output,
-                                                                  device=self.device,
-                                                                  threshold=self.llm_int8_threshold)
-                    layer.attention.qkv_lin = make_llm_int8_linear(layer.attention.qkv_lin,
-                                                                   device=self.device,
-                                                                   threshold=self.llm_int8_threshold)
-                    layer.ffn.dense_h_h4 = make_llm_int8_linear(layer.ffn.dense_h_h4,
-                                                                device=self.device,
-                                                                threshold=self.llm_int8_threshold)
-                    layer.ffn.dense_h4_h = make_llm_int8_linear(layer.ffn.dense_h4_h,
-                                                                device=self.device,
-                                                                threshold=self.llm_int8_threshold)
+                    layer.attention._modules['output'] = make_llm_int8_linear(layer.attention.output,
+                                                                              device=self.device,
+                                                                              threshold=self.llm_int8_threshold)
+                    layer.attention._modules['qkv_lin'] = make_llm_int8_linear(layer.attention.qkv_lin,
+                                                                               device=self.device,
+                                                                               threshold=self.llm_int8_threshold)
+                    layer.ffn._modules['dense_h_h4'] = make_llm_int8_linear(layer.ffn.dense_h_h4,
+                                                                            device=self.device,
+                                                                            threshold=self.llm_int8_threshold)
+                    layer.ffn._modules['dense_h4_h'] = make_llm_int8_linear(layer.ffn.dense_h4_h,
+                                                                            device=self.device,
+                                                                            threshold=self.llm_int8_threshold)
        #
        return layer