提交 1b223c30 编写于 作者: S slf12

use new seq2seq api test=develop

上级 ed1cb0ce
...@@ -16,6 +16,7 @@ from __future__ import division ...@@ -16,6 +16,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import paddle.fluid as fluid import paddle.fluid as fluid
import six import six
import numpy as np
decoder_size = 128 decoder_size = 128
word_vector_dim = 128 word_vector_dim = 128
...@@ -24,6 +25,7 @@ sos = 0 ...@@ -24,6 +25,7 @@ sos = 0
eos = 1 eos = 1
beam_size = 1 beam_size = 1
def conv_bn_pool(input, def conv_bn_pool(input,
group, group,
out_ch, out_ch,
...@@ -79,32 +81,42 @@ def encoder_net(images, rnn_hidden_size=200, is_test=False, use_cudnn=True): ...@@ -79,32 +81,42 @@ def encoder_net(images, rnn_hidden_size=200, is_test=False, use_cudnn=True):
stride=[1, 1], stride=[1, 1],
filter_size=[conv_features.shape[2], 1]) filter_size=[conv_features.shape[2], 1])
pad_value = fluid.layers.assign(input=np.array([0.0], dtype=np.float32))
sliced_feature_pad, output_len = fluid.layers.sequence_pad(
sliced_feature, pad_value, maxlen=48)
para_attr = fluid.ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.02)) para_attr = fluid.ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.02))
bias_attr = fluid.ParamAttr( bias_attr = fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0) initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0)
fc_1 = fluid.layers.fc(input=sliced_feature, fc_1 = fluid.layers.fc(input=sliced_feature_pad,
size=rnn_hidden_size * 3, size=rnn_hidden_size * 3,
param_attr=para_attr, param_attr=para_attr,
bias_attr=False) bias_attr=False,
fc_2 = fluid.layers.fc(input=sliced_feature, num_flatten_dims=2)
fc_2 = fluid.layers.fc(input=sliced_feature_pad,
size=rnn_hidden_size * 3, size=rnn_hidden_size * 3,
param_attr=para_attr, param_attr=para_attr,
bias_attr=False) bias_attr=False,
num_flatten_dims=2)
gru_forward = fluid.layers.dynamic_gru( gru_cell_forward = fluid.layers.GRUCell(
input=fc_1, hidden_size=rnn_hidden_size,
size=rnn_hidden_size,
param_attr=para_attr, param_attr=para_attr,
bias_attr=bias_attr, bias_attr=bias_attr,
candidate_activation='relu') activation=fluid.layers.relu)
gru_backward = fluid.layers.dynamic_gru( gru_cell_backward = fluid.layers.GRUCell(
input=fc_2, hidden_size=rnn_hidden_size,
size=rnn_hidden_size,
is_reverse=True,
param_attr=para_attr, param_attr=para_attr,
bias_attr=bias_attr, bias_attr=bias_attr,
candidate_activation='relu') activation=fluid.layers.relu)
gru_forward, _ = fluid.layers.rnn(gru_cell_forward, inputs=fc_1)
gru_backward, _ = fluid.layers.rnn(gru_cell_backward,
inputs=fc_2,
is_reverse=True)
output_len = fluid.layers.reshape(output_len, [-1])
gru_forward = fluid.layers.sequence_unpad(x=gru_forward, length=output_len)
gru_backward = fluid.layers.sequence_unpad(
x=gru_backward, length=output_len)
encoded_vector = fluid.layers.concat( encoded_vector = fluid.layers.concat(
input=[gru_forward, gru_backward], axis=1) input=[gru_forward, gru_backward], axis=1)
...@@ -123,7 +135,9 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, ...@@ -123,7 +135,9 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj,
bias_attr=False) bias_attr=False)
decoder_state_expand = fluid.layers.sequence_expand( decoder_state_expand = fluid.layers.sequence_expand(
x=decoder_state_proj, y=encoder_proj) x=decoder_state_proj, y=encoder_proj)
concated = encoder_proj + decoder_state_expand #concated = encoder_proj + decoder_state_expand
concated = fluid.layers.elementwise_add(encoder_proj,
decoder_state_expand)
concated = fluid.layers.tanh(x=concated) concated = fluid.layers.tanh(x=concated)
attention_weights = fluid.layers.fc(input=concated, attention_weights = fluid.layers.fc(input=concated,
size=1, size=1,
...@@ -137,13 +151,19 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, ...@@ -137,13 +151,19 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj,
context = fluid.layers.sequence_pool(input=scaled, pool_type='sum') context = fluid.layers.sequence_pool(input=scaled, pool_type='sum')
return context return context
rnn = fluid.layers.DynamicRNN() pad_value = fluid.layers.assign(np.array([0.0], dtype=np.float32))
target_embedding_pad, target_embedding_length = fluid.layers.sequence_pad(
target_embedding, pad_value)
target_embedding_length = fluid.layers.reshape(target_embedding_length,
[-1])
with rnn.block(): target_embedding_pad = fluid.layers.transpose(target_embedding_pad,
current_word = rnn.step_input(target_embedding) [1, 0, 2])
encoder_vec = rnn.static_input(encoder_vec) rnn = fluid.layers.StaticRNN()
encoder_proj = rnn.static_input(encoder_proj)
hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True) with rnn.step():
current_word = rnn.step_input(target_embedding_pad)
hidden_mem = rnn.memory(init=decoder_boot)
context = simple_attention(encoder_vec, encoder_proj, hidden_mem) context = simple_attention(encoder_vec, encoder_proj, hidden_mem)
fc_1 = fluid.layers.fc(input=context, fc_1 = fluid.layers.fc(input=context,
size=decoder_size * 3, size=decoder_size * 3,
...@@ -159,8 +179,13 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, ...@@ -159,8 +179,13 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj,
size=num_classes + 2, size=num_classes + 2,
bias_attr=True, bias_attr=True,
act='softmax') act='softmax')
rnn.output(out) rnn.step_output(out)
return rnn() rnn_out = rnn()
rnn_out = fluid.layers.transpose(rnn_out, [1, 0, 2])
rnn_out = fluid.layers.sequence_unpad(
x=rnn_out, length=target_embedding_length)
return rnn_out
def attention_train_net(args, data_shape, num_classes): def attention_train_net(args, data_shape, num_classes):
...@@ -188,7 +213,8 @@ def attention_train_net(args, data_shape, num_classes): ...@@ -188,7 +213,8 @@ def attention_train_net(args, data_shape, num_classes):
prediction = gru_decoder_with_attention(trg_embedding, encoded_vector, prediction = gru_decoder_with_attention(trg_embedding, encoded_vector,
encoded_proj, decoder_boot, encoded_proj, decoder_boot,
decoder_size, num_classes) decoder_size, num_classes)
fluid.clip.set_gradient_clip(fluid.clip.GradientClipByValue(args.gradient_clip)) fluid.clip.set_gradient_clip(
fluid.clip.GradientClipByValue(args.gradient_clip))
label_out = fluid.layers.cast(x=label_out, dtype='int64') label_out = fluid.layers.cast(x=label_out, dtype='int64')
_, maxid = fluid.layers.topk(input=prediction, k=1) _, maxid = fluid.layers.topk(input=prediction, k=1)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册