提交 46fc14c2 编写于 作者: C caoying03

update the text generation demo.

上级 4f0d8acf
......@@ -91,11 +91,11 @@ PaddleBook中[机器翻译](https://github.com/PaddlePaddle/book/blob/develop/08
```python
#### Decoder
encoder_last = paddle.layer.last_seq(input=encoded_vector)
with paddle.layer.mixed(
encoder_last_projected = paddle.layer.mixed(
size=decoder_size,
act=paddle.activation.Tanh()) as encoder_last_projected:
encoder_last_projected += paddle.layer.full_matrix_projection(
input=encoder_last)
act=paddle.activation.Tanh(),
input=paddle.layer.full_matrix_projection(input=encoder_last))
# gru step
def gru_decoder_without_attention(enc_vec, current_word):
'''
......@@ -112,10 +112,12 @@ def gru_decoder_without_attention(enc_vec, current_word):
context = paddle.layer.last_seq(input=enc_vec)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs +=paddle.layer.full_matrix_projection(input=context)
decoder_inputs += paddle.layer.full_matrix_projection(
input=current_word)
decoder_inputs = paddle.layer.mixed(
size=decoder_size * 3,
input=[
paddle.layer.full_matrix_projection(input=context),
paddle.layer.full_matrix_projection(input=current_word)
])
gru_step = paddle.layer.gru_step(
name='gru_decoder',
......@@ -125,24 +127,24 @@ def gru_decoder_without_attention(enc_vec, current_word):
output_mem=decoder_mem,
size=decoder_size)
with paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax()) as out:
out += paddle.layer.full_matrix_projection(input=gru_step)
out = paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax(),
input=paddle.layer.full_matrix_projection(input=gru_step))
return out
```
在模型训练和测试阶段,解码器的行为有很大的不同:
- **训练阶段**:目标翻译结果的词向量`trg_embedding`作为参数传递给单步逻辑`gru_decoder_without_attention()`,函数`recurrent_group()`循环调用单步逻辑执行,最后计算目标翻译与实际解码的差异cost并返回;
- **测试阶段**:解码器根据最后一个生成的词预测下一个词,`GeneratedInputV2()`自动取出模型预测出的概率最高的$k$个词的词向量传递给单步逻辑,`beam_search()`函数调用单步逻辑函数`gru_decoder_without_attention()`完成柱搜索并作为结果返回。
- **测试阶段**:解码器根据最后一个生成的词预测下一个词,`GeneratedInput()`自动取出模型预测出的概率最高的$k$个词的词向量传递给单步逻辑,`beam_search()`函数调用单步逻辑函数`gru_decoder_without_attention()`完成柱搜索并作为结果返回。
训练和生成的逻辑分别实现在如下的`if-else`条件分支中:
```python
decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True)
group_inputs = [group_input1]
if not generating:
trg_embedding = paddle.layer.embedding(
......@@ -166,7 +168,7 @@ if not generating:
return cost
else:
trg_embedding = paddle.layer.GeneratedInputV2(
trg_embedding = paddle.layer.GeneratedInput(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
......
......@@ -133,11 +133,11 @@ PaddleBook中[机器翻译](https://github.com/PaddlePaddle/book/blob/develop/08
```python
#### Decoder
encoder_last = paddle.layer.last_seq(input=encoded_vector)
with paddle.layer.mixed(
encoder_last_projected = paddle.layer.mixed(
size=decoder_size,
act=paddle.activation.Tanh()) as encoder_last_projected:
encoder_last_projected += paddle.layer.full_matrix_projection(
input=encoder_last)
act=paddle.activation.Tanh(),
input=paddle.layer.full_matrix_projection(input=encoder_last))
# gru step
def gru_decoder_without_attention(enc_vec, current_word):
'''
......@@ -154,10 +154,12 @@ def gru_decoder_without_attention(enc_vec, current_word):
context = paddle.layer.last_seq(input=enc_vec)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs +=paddle.layer.full_matrix_projection(input=context)
decoder_inputs += paddle.layer.full_matrix_projection(
input=current_word)
decoder_inputs = paddle.layer.mixed(
size=decoder_size * 3,
input=[
paddle.layer.full_matrix_projection(input=context),
paddle.layer.full_matrix_projection(input=current_word)
])
gru_step = paddle.layer.gru_step(
name='gru_decoder',
......@@ -167,24 +169,24 @@ def gru_decoder_without_attention(enc_vec, current_word):
output_mem=decoder_mem,
size=decoder_size)
with paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax()) as out:
out += paddle.layer.full_matrix_projection(input=gru_step)
out = paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax(),
input=paddle.layer.full_matrix_projection(input=gru_step))
return out
```
在模型训练和测试阶段,解码器的行为有很大的不同:
- **训练阶段**:目标翻译结果的词向量`trg_embedding`作为参数传递给单步逻辑`gru_decoder_without_attention()`,函数`recurrent_group()`循环调用单步逻辑执行,最后计算目标翻译与实际解码的差异cost并返回;
- **测试阶段**:解码器根据最后一个生成的词预测下一个词,`GeneratedInputV2()`自动取出模型预测出的概率最高的$k$个词的词向量传递给单步逻辑,`beam_search()`函数调用单步逻辑函数`gru_decoder_without_attention()`完成柱搜索并作为结果返回。
- **测试阶段**:解码器根据最后一个生成的词预测下一个词,`GeneratedInput()`自动取出模型预测出的概率最高的$k$个词的词向量传递给单步逻辑,`beam_search()`函数调用单步逻辑函数`gru_decoder_without_attention()`完成柱搜索并作为结果返回。
训练和生成的逻辑分别实现在如下的`if-else`条件分支中:
```python
decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True)
group_inputs = [group_input1]
if not generating:
trg_embedding = paddle.layer.embedding(
......@@ -208,7 +210,7 @@ if not generating:
return cost
else:
trg_embedding = paddle.layer.GeneratedInputV2(
trg_embedding = paddle.layer.GeneratedInput(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
......
......@@ -16,7 +16,7 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False):
'''
Define the network structure of NMT, including encoder and decoder.
:param source_dict_dim: size of source dictionary
:param source_dict_dim: size of source dictionary
:type source_dict_dim : int
:param target_dict_dim: size of target dictionary
:type target_dict_dim: int
......@@ -41,11 +41,11 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False):
return_seq=True)
#### Decoder
encoder_last = paddle.layer.last_seq(input=encoded_vector)
with paddle.layer.mixed(
size=decoder_size,
act=paddle.activation.Tanh()) as encoder_last_projected:
encoder_last_projected += paddle.layer.full_matrix_projection(
input=encoder_last)
encoder_last_projected = paddle.layer.mixed(
size=decoder_size,
act=paddle.activation.Tanh(),
input=paddle.layer.full_matrix_projection(input=encoder_last))
# gru step
def gru_decoder_without_attention(enc_vec, current_word):
'''
......@@ -63,10 +63,12 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False):
context = paddle.layer.last_seq(input=enc_vec)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
decoder_inputs += paddle.layer.full_matrix_projection(
input=current_word)
decoder_inputs = paddle.layer.mixed(
size=decoder_size * 3,
input=[
paddle.layer.full_matrix_projection(input=context),
paddle.layer.full_matrix_projection(input=current_word)
])
gru_step = paddle.layer.gru_step(
name='gru_decoder',
......@@ -76,15 +78,15 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False):
output_mem=decoder_mem,
size=decoder_size)
with paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax()) as out:
out += paddle.layer.full_matrix_projection(input=gru_step)
out = paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax(),
input=paddle.layer.full_matrix_projection(input=gru_step))
return out
decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True)
group_inputs = [group_input1]
if not generating:
......@@ -109,7 +111,7 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False):
return cost
else:
trg_embedding = paddle.layer.GeneratedInputV2(
trg_embedding = paddle.layer.GeneratedInput(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
......@@ -194,7 +196,7 @@ def generate(source_dict_dim, target_dict_dim, init_models_path):
beam_gen = seq2seq_net(source_dict_dim, target_dict_dim, True)
with gzip.open(init_models_path) as f:
parameters = paddle.parameters.Parameters.from_tar(f)
# prob is the prediction probabilities, and id is the prediction word.
# prob is the prediction probabilities, and id is the prediction word.
beam_result = paddle.infer(
output_layer=beam_gen,
parameters=parameters,
......@@ -244,10 +246,10 @@ def main():
target_language_dict_dim = 30000
if generating:
# shoud pass the right generated model's path here
# modify this path to speicify a trained model.
init_models_path = 'models/nmt_without_att_params_batch_1800.tar.gz'
if not os.path.exists(init_models_path):
print "Cannot find models for generation"
print "trained model cannot be found."
exit(1)
generate(source_language_dict_dim, target_language_dict_dim,
init_models_path)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册