提交 1ce74564 编写于 作者: S slf12

fix simple attention test=develop

上级 feaa38c3
...@@ -133,22 +133,27 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, ...@@ -133,22 +133,27 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj,
decoder_state_proj = fluid.layers.fc(input=decoder_state, decoder_state_proj = fluid.layers.fc(input=decoder_state,
size=decoder_size, size=decoder_size,
bias_attr=False) bias_attr=False)
decoder_state_expand = fluid.layers.sequence_expand( decoder_state_proj = fluid.layers.unsqueeze(
x=decoder_state_proj, y=encoder_proj) decoder_state_proj, axes=[1])
#concated = encoder_proj + decoder_state_expand decoder_state_expand = fluid.layers.expand(
decoder_state_proj, [1, encoder_proj.shape[1], 1])
concated = fluid.layers.elementwise_add(encoder_proj, concated = fluid.layers.elementwise_add(encoder_proj,
decoder_state_expand) decoder_state_expand)
concated = fluid.layers.tanh(x=concated) concated = fluid.layers.tanh(x=concated)
attention_weights = fluid.layers.fc(input=concated, attention_weights = fluid.layers.fc(input=concated,
size=1, size=1,
act=None, act=None,
bias_attr=False) bias_attr=False,
attention_weights = fluid.layers.sequence_softmax( num_flatten_dims=2)
input=attention_weights) attention_weights = fluid.layers.softmax(input=attention_weights)
weigths_reshape = fluid.layers.reshape(x=attention_weights, shape=[-1])
scaled = fluid.layers.elementwise_mul( scaled = fluid.layers.elementwise_mul(
x=encoder_vec, y=weigths_reshape, axis=0) x=encoder_vec, y=attention_weights, axis=0)
context = fluid.layers.sequence_pool(input=scaled, pool_type='sum') scaled = fluid.layers.unsqueeze(scaled, axes=[3])
context = fluid.layers.pool2d(
input=scaled,
pool_type='avg',
pool_size=[scaled.shape[-2], scaled.shape[-1]])
context = fluid.layers.squeeze(context, axes=[2, 3])
return context return context
pad_value = fluid.layers.assign(np.array([0.0], dtype=np.float32)) pad_value = fluid.layers.assign(np.array([0.0], dtype=np.float32))
...@@ -159,12 +164,17 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, ...@@ -159,12 +164,17 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj,
target_embedding_pad = fluid.layers.transpose(target_embedding_pad, target_embedding_pad = fluid.layers.transpose(target_embedding_pad,
[1, 0, 2]) [1, 0, 2])
encoder_vec_pad, _ = fluid.layers.sequence_pad(
encoder_vec, pad_value, maxlen=48)
encoder_proj_pad, _ = fluid.layers.sequence_pad(
encoder_proj, pad_value, maxlen=48)
rnn = fluid.layers.StaticRNN() rnn = fluid.layers.StaticRNN()
with rnn.step(): with rnn.step():
current_word = rnn.step_input(target_embedding_pad) current_word = rnn.step_input(target_embedding_pad)
hidden_mem = rnn.memory(init=decoder_boot) hidden_mem = rnn.memory(init=decoder_boot)
context = simple_attention(encoder_vec, encoder_proj, hidden_mem) context = simple_attention(encoder_vec_pad, encoder_proj_pad,
hidden_mem)
fc_1 = fluid.layers.fc(input=context, fc_1 = fluid.layers.fc(input=context,
size=decoder_size * 3, size=decoder_size * 3,
bias_attr=False) bias_attr=False)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册