提交 a7ef0aef 编写于 作者: S songyouwei 提交者: hong

access public Parameters instead (#4159)

test=develop
上级 2934b0be
......@@ -405,7 +405,7 @@ class UnifiedTransformer(ModelBase):
if self.two_layer_predictor:
dec_embed = self.pre_predictor(dec_embed)
if self.weight_sharing:
token_embedding = self.embedder.token_embedding._w
token_embedding = self.embedder.token_embedding.weight
dec_logits = layers.matmul(
x=dec_embed,
y=token_embedding,
......@@ -648,7 +648,7 @@ class UnifiedTransformer(ModelBase):
if self.two_layer_predictor:
pred_embed = self.pre_predictor(pred_embed)
if self.weight_sharing:
token_embedding = self.embedder.token_embedding._w
token_embedding = self.embedder.token_embedding.weight
pred_logits = layers.matmul(
x=pred_embed,
y=token_embedding,
......
......@@ -384,7 +384,7 @@ class PositionEmbedding(dg.Layer):
out (Variable): Shape(B, C_pos), position embedding, where C_pos
means position embedding size.
"""
rad = fluid.layers.transpose(self.embed._w, perm=[1, 0])
rad = fluid.layers.transpose(self.embed.weight, perm=[1, 0])
batch_size = indices.shape[0]
if speaker_position_rate is None:
......
......@@ -640,7 +640,7 @@ class WrapDecoderLayer(Layer):
if self._weight_sharing:
predict = layers.matmul(x=dec_output_reshape,
y=self._prepare_decoder_layer._input_emb._w,
y=self._prepare_decoder_layer._input_emb.weight,
transpose_y=True)
else:
predict = self._fc(dec_output_reshape)
......@@ -693,7 +693,7 @@ class TransFormer(Layer):
weight_sharing)
if weight_sharing:
self._wrap_decoder_layer._prepare_decoder_layer._input_emb._w = self._wrap_encoder_layer._prepare_encoder_layer._input_emb._w
self._wrap_decoder_layer._prepare_decoder_layer._input_emb.weight = self._wrap_encoder_layer._prepare_encoder_layer._input_emb.weight
self.n_layer = n_layer
self.n_head = n_head
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册