From 1ce74564bdf56e21ff311badcfe49df4e8bf65f6 Mon Sep 17 00:00:00 2001 From: slf12 Date: Tue, 5 Nov 2019 02:56:02 +0000 Subject: [PATCH] fix simple attention test=develop --- PaddleCV/ocr_recognition/attention_model.py | 30 ++++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/PaddleCV/ocr_recognition/attention_model.py b/PaddleCV/ocr_recognition/attention_model.py index 76e0d604..6b285d2e 100755 --- a/PaddleCV/ocr_recognition/attention_model.py +++ b/PaddleCV/ocr_recognition/attention_model.py @@ -133,22 +133,27 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, decoder_state_proj = fluid.layers.fc(input=decoder_state, size=decoder_size, bias_attr=False) - decoder_state_expand = fluid.layers.sequence_expand( - x=decoder_state_proj, y=encoder_proj) - #concated = encoder_proj + decoder_state_expand + decoder_state_proj = fluid.layers.unsqueeze( + decoder_state_proj, axes=[1]) + decoder_state_expand = fluid.layers.expand( + decoder_state_proj, [1, encoder_proj.shape[1], 1]) concated = fluid.layers.elementwise_add(encoder_proj, decoder_state_expand) concated = fluid.layers.tanh(x=concated) attention_weights = fluid.layers.fc(input=concated, size=1, act=None, - bias_attr=False) - attention_weights = fluid.layers.sequence_softmax( - input=attention_weights) - weigths_reshape = fluid.layers.reshape(x=attention_weights, shape=[-1]) + bias_attr=False, + num_flatten_dims=2) + attention_weights = fluid.layers.softmax(input=attention_weights) scaled = fluid.layers.elementwise_mul( - x=encoder_vec, y=weigths_reshape, axis=0) - context = fluid.layers.sequence_pool(input=scaled, pool_type='sum') + x=encoder_vec, y=attention_weights, axis=0) + scaled = fluid.layers.unsqueeze(scaled, axes=[3]) + context = fluid.layers.pool2d( + input=scaled, + pool_type='avg', + pool_size=[scaled.shape[-2], scaled.shape[-1]]) + context = fluid.layers.squeeze(context, axes=[2, 3]) return context pad_value = fluid.layers.assign(np.array([0.0], dtype=np.float32)) @@ -159,12 +164,17 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, target_embedding_pad = fluid.layers.transpose(target_embedding_pad, [1, 0, 2]) + encoder_vec_pad, _ = fluid.layers.sequence_pad( + encoder_vec, pad_value, maxlen=48) + encoder_proj_pad, _ = fluid.layers.sequence_pad( + encoder_proj, pad_value, maxlen=48) rnn = fluid.layers.StaticRNN() with rnn.step(): current_word = rnn.step_input(target_embedding_pad) hidden_mem = rnn.memory(init=decoder_boot) - context = simple_attention(encoder_vec, encoder_proj, hidden_mem) + context = simple_attention(encoder_vec_pad, encoder_proj_pad, + hidden_mem) fc_1 = fluid.layers.fc(input=context, size=decoder_size * 3, bias_attr=False) -- GitLab