提交 46fc14c2 编写于 作者: C caoying03

update the text generation demo.

上级 4f0d8acf
...@@ -91,11 +91,11 @@ PaddleBook中[机器翻译](https://github.com/PaddlePaddle/book/blob/develop/08 ...@@ -91,11 +91,11 @@ PaddleBook中[机器翻译](https://github.com/PaddlePaddle/book/blob/develop/08
```python ```python
#### Decoder #### Decoder
encoder_last = paddle.layer.last_seq(input=encoded_vector) encoder_last = paddle.layer.last_seq(input=encoded_vector)
with paddle.layer.mixed( encoder_last_projected = paddle.layer.mixed(
size=decoder_size, size=decoder_size,
act=paddle.activation.Tanh()) as encoder_last_projected: act=paddle.activation.Tanh(),
encoder_last_projected += paddle.layer.full_matrix_projection( input=paddle.layer.full_matrix_projection(input=encoder_last))
input=encoder_last)
# gru step # gru step
def gru_decoder_without_attention(enc_vec, current_word): def gru_decoder_without_attention(enc_vec, current_word):
''' '''
...@@ -112,10 +112,12 @@ def gru_decoder_without_attention(enc_vec, current_word): ...@@ -112,10 +112,12 @@ def gru_decoder_without_attention(enc_vec, current_word):
context = paddle.layer.last_seq(input=enc_vec) context = paddle.layer.last_seq(input=enc_vec)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs: decoder_inputs = paddle.layer.mixed(
decoder_inputs +=paddle.layer.full_matrix_projection(input=context) size=decoder_size * 3,
decoder_inputs += paddle.layer.full_matrix_projection( input=[
input=current_word) paddle.layer.full_matrix_projection(input=context),
paddle.layer.full_matrix_projection(input=current_word)
])
gru_step = paddle.layer.gru_step( gru_step = paddle.layer.gru_step(
name='gru_decoder', name='gru_decoder',
...@@ -125,24 +127,24 @@ def gru_decoder_without_attention(enc_vec, current_word): ...@@ -125,24 +127,24 @@ def gru_decoder_without_attention(enc_vec, current_word):
output_mem=decoder_mem, output_mem=decoder_mem,
size=decoder_size) size=decoder_size)
with paddle.layer.mixed( out = paddle.layer.mixed(
size=target_dict_dim, size=target_dict_dim,
bias_attr=True, bias_attr=True,
act=paddle.activation.Softmax()) as out: act=paddle.activation.Softmax(),
out += paddle.layer.full_matrix_projection(input=gru_step) input=paddle.layer.full_matrix_projection(input=gru_step))
return out return out
``` ```
在模型训练和测试阶段,解码器的行为有很大的不同: 在模型训练和测试阶段,解码器的行为有很大的不同:
- **训练阶段**:目标翻译结果的词向量`trg_embedding`作为参数传递给单步逻辑`gru_decoder_without_attention()`,函数`recurrent_group()`循环调用单步逻辑执行,最后计算目标翻译与实际解码的差异cost并返回; - **训练阶段**:目标翻译结果的词向量`trg_embedding`作为参数传递给单步逻辑`gru_decoder_without_attention()`,函数`recurrent_group()`循环调用单步逻辑执行,最后计算目标翻译与实际解码的差异cost并返回;
- **测试阶段**:解码器根据最后一个生成的词预测下一个词,`GeneratedInputV2()`自动取出模型预测出的概率最高的$k$个词的词向量传递给单步逻辑,`beam_search()`函数调用单步逻辑函数`gru_decoder_without_attention()`完成柱搜索并作为结果返回。 - **测试阶段**:解码器根据最后一个生成的词预测下一个词,`GeneratedInput()`自动取出模型预测出的概率最高的$k$个词的词向量传递给单步逻辑,`beam_search()`函数调用单步逻辑函数`gru_decoder_without_attention()`完成柱搜索并作为结果返回。
训练和生成的逻辑分别实现在如下的`if-else`条件分支中: 训练和生成的逻辑分别实现在如下的`if-else`条件分支中:
```python ```python
decoder_group_name = "decoder_group" decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True) group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True)
group_inputs = [group_input1] group_inputs = [group_input1]
if not generating: if not generating:
trg_embedding = paddle.layer.embedding( trg_embedding = paddle.layer.embedding(
...@@ -166,7 +168,7 @@ if not generating: ...@@ -166,7 +168,7 @@ if not generating:
return cost return cost
else: else:
trg_embedding = paddle.layer.GeneratedInputV2( trg_embedding = paddle.layer.GeneratedInput(
size=target_dict_dim, size=target_dict_dim,
embedding_name='_target_language_embedding', embedding_name='_target_language_embedding',
embedding_size=word_vector_dim) embedding_size=word_vector_dim)
......
...@@ -133,11 +133,11 @@ PaddleBook中[机器翻译](https://github.com/PaddlePaddle/book/blob/develop/08 ...@@ -133,11 +133,11 @@ PaddleBook中[机器翻译](https://github.com/PaddlePaddle/book/blob/develop/08
```python ```python
#### Decoder #### Decoder
encoder_last = paddle.layer.last_seq(input=encoded_vector) encoder_last = paddle.layer.last_seq(input=encoded_vector)
with paddle.layer.mixed( encoder_last_projected = paddle.layer.mixed(
size=decoder_size, size=decoder_size,
act=paddle.activation.Tanh()) as encoder_last_projected: act=paddle.activation.Tanh(),
encoder_last_projected += paddle.layer.full_matrix_projection( input=paddle.layer.full_matrix_projection(input=encoder_last))
input=encoder_last)
# gru step # gru step
def gru_decoder_without_attention(enc_vec, current_word): def gru_decoder_without_attention(enc_vec, current_word):
''' '''
...@@ -154,10 +154,12 @@ def gru_decoder_without_attention(enc_vec, current_word): ...@@ -154,10 +154,12 @@ def gru_decoder_without_attention(enc_vec, current_word):
context = paddle.layer.last_seq(input=enc_vec) context = paddle.layer.last_seq(input=enc_vec)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs: decoder_inputs = paddle.layer.mixed(
decoder_inputs +=paddle.layer.full_matrix_projection(input=context) size=decoder_size * 3,
decoder_inputs += paddle.layer.full_matrix_projection( input=[
input=current_word) paddle.layer.full_matrix_projection(input=context),
paddle.layer.full_matrix_projection(input=current_word)
])
gru_step = paddle.layer.gru_step( gru_step = paddle.layer.gru_step(
name='gru_decoder', name='gru_decoder',
...@@ -167,24 +169,24 @@ def gru_decoder_without_attention(enc_vec, current_word): ...@@ -167,24 +169,24 @@ def gru_decoder_without_attention(enc_vec, current_word):
output_mem=decoder_mem, output_mem=decoder_mem,
size=decoder_size) size=decoder_size)
with paddle.layer.mixed( out = paddle.layer.mixed(
size=target_dict_dim, size=target_dict_dim,
bias_attr=True, bias_attr=True,
act=paddle.activation.Softmax()) as out: act=paddle.activation.Softmax(),
out += paddle.layer.full_matrix_projection(input=gru_step) input=paddle.layer.full_matrix_projection(input=gru_step))
return out return out
``` ```
在模型训练和测试阶段,解码器的行为有很大的不同: 在模型训练和测试阶段,解码器的行为有很大的不同:
- **训练阶段**:目标翻译结果的词向量`trg_embedding`作为参数传递给单步逻辑`gru_decoder_without_attention()`,函数`recurrent_group()`循环调用单步逻辑执行,最后计算目标翻译与实际解码的差异cost并返回; - **训练阶段**:目标翻译结果的词向量`trg_embedding`作为参数传递给单步逻辑`gru_decoder_without_attention()`,函数`recurrent_group()`循环调用单步逻辑执行,最后计算目标翻译与实际解码的差异cost并返回;
- **测试阶段**:解码器根据最后一个生成的词预测下一个词,`GeneratedInputV2()`自动取出模型预测出的概率最高的$k$个词的词向量传递给单步逻辑,`beam_search()`函数调用单步逻辑函数`gru_decoder_without_attention()`完成柱搜索并作为结果返回。 - **测试阶段**:解码器根据最后一个生成的词预测下一个词,`GeneratedInput()`自动取出模型预测出的概率最高的$k$个词的词向量传递给单步逻辑,`beam_search()`函数调用单步逻辑函数`gru_decoder_without_attention()`完成柱搜索并作为结果返回。
训练和生成的逻辑分别实现在如下的`if-else`条件分支中: 训练和生成的逻辑分别实现在如下的`if-else`条件分支中:
```python ```python
decoder_group_name = "decoder_group" decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True) group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True)
group_inputs = [group_input1] group_inputs = [group_input1]
if not generating: if not generating:
trg_embedding = paddle.layer.embedding( trg_embedding = paddle.layer.embedding(
...@@ -208,7 +210,7 @@ if not generating: ...@@ -208,7 +210,7 @@ if not generating:
return cost return cost
else: else:
trg_embedding = paddle.layer.GeneratedInputV2( trg_embedding = paddle.layer.GeneratedInput(
size=target_dict_dim, size=target_dict_dim,
embedding_name='_target_language_embedding', embedding_name='_target_language_embedding',
embedding_size=word_vector_dim) embedding_size=word_vector_dim)
......
...@@ -16,7 +16,7 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False): ...@@ -16,7 +16,7 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False):
''' '''
Define the network structure of NMT, including encoder and decoder. Define the network structure of NMT, including encoder and decoder.
:param source_dict_dim: size of source dictionary :param source_dict_dim: size of source dictionary
:type source_dict_dim : int :type source_dict_dim : int
:param target_dict_dim: size of target dictionary :param target_dict_dim: size of target dictionary
:type target_dict_dim: int :type target_dict_dim: int
...@@ -41,11 +41,11 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False): ...@@ -41,11 +41,11 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False):
return_seq=True) return_seq=True)
#### Decoder #### Decoder
encoder_last = paddle.layer.last_seq(input=encoded_vector) encoder_last = paddle.layer.last_seq(input=encoded_vector)
with paddle.layer.mixed( encoder_last_projected = paddle.layer.mixed(
size=decoder_size, size=decoder_size,
act=paddle.activation.Tanh()) as encoder_last_projected: act=paddle.activation.Tanh(),
encoder_last_projected += paddle.layer.full_matrix_projection( input=paddle.layer.full_matrix_projection(input=encoder_last))
input=encoder_last)
# gru step # gru step
def gru_decoder_without_attention(enc_vec, current_word): def gru_decoder_without_attention(enc_vec, current_word):
''' '''
...@@ -63,10 +63,12 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False): ...@@ -63,10 +63,12 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False):
context = paddle.layer.last_seq(input=enc_vec) context = paddle.layer.last_seq(input=enc_vec)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs: decoder_inputs = paddle.layer.mixed(
decoder_inputs += paddle.layer.full_matrix_projection(input=context) size=decoder_size * 3,
decoder_inputs += paddle.layer.full_matrix_projection( input=[
input=current_word) paddle.layer.full_matrix_projection(input=context),
paddle.layer.full_matrix_projection(input=current_word)
])
gru_step = paddle.layer.gru_step( gru_step = paddle.layer.gru_step(
name='gru_decoder', name='gru_decoder',
...@@ -76,15 +78,15 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False): ...@@ -76,15 +78,15 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False):
output_mem=decoder_mem, output_mem=decoder_mem,
size=decoder_size) size=decoder_size)
with paddle.layer.mixed( out = paddle.layer.mixed(
size=target_dict_dim, size=target_dict_dim,
bias_attr=True, bias_attr=True,
act=paddle.activation.Softmax()) as out: act=paddle.activation.Softmax(),
out += paddle.layer.full_matrix_projection(input=gru_step) input=paddle.layer.full_matrix_projection(input=gru_step))
return out return out
decoder_group_name = "decoder_group" decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True) group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True)
group_inputs = [group_input1] group_inputs = [group_input1]
if not generating: if not generating:
...@@ -109,7 +111,7 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False): ...@@ -109,7 +111,7 @@ def seq2seq_net(source_dict_dim, target_dict_dim, generating=False):
return cost return cost
else: else:
trg_embedding = paddle.layer.GeneratedInputV2( trg_embedding = paddle.layer.GeneratedInput(
size=target_dict_dim, size=target_dict_dim,
embedding_name='_target_language_embedding', embedding_name='_target_language_embedding',
embedding_size=word_vector_dim) embedding_size=word_vector_dim)
...@@ -194,7 +196,7 @@ def generate(source_dict_dim, target_dict_dim, init_models_path): ...@@ -194,7 +196,7 @@ def generate(source_dict_dim, target_dict_dim, init_models_path):
beam_gen = seq2seq_net(source_dict_dim, target_dict_dim, True) beam_gen = seq2seq_net(source_dict_dim, target_dict_dim, True)
with gzip.open(init_models_path) as f: with gzip.open(init_models_path) as f:
parameters = paddle.parameters.Parameters.from_tar(f) parameters = paddle.parameters.Parameters.from_tar(f)
# prob is the prediction probabilities, and id is the prediction word. # prob is the prediction probabilities, and id is the prediction word.
beam_result = paddle.infer( beam_result = paddle.infer(
output_layer=beam_gen, output_layer=beam_gen,
parameters=parameters, parameters=parameters,
...@@ -244,10 +246,10 @@ def main(): ...@@ -244,10 +246,10 @@ def main():
target_language_dict_dim = 30000 target_language_dict_dim = 30000
if generating: if generating:
# shoud pass the right generated model's path here # modify this path to speicify a trained model.
init_models_path = 'models/nmt_without_att_params_batch_1800.tar.gz' init_models_path = 'models/nmt_without_att_params_batch_1800.tar.gz'
if not os.path.exists(init_models_path): if not os.path.exists(init_models_path):
print "Cannot find models for generation" print "trained model cannot be found."
exit(1) exit(1)
generate(source_language_dict_dim, target_language_dict_dim, generate(source_language_dict_dim, target_language_dict_dim,
init_models_path) init_models_path)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册