#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # #Licensed under the Apache License, Version 2.0 (the "License"); #you may not use this file except in compliance with the License. #You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # #Unless required by applicable law or agreed to in writing, software #distributed under the License is distributed on an "AS IS" BASIS, #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #See the License for the specific language governing permissions and #limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function import math import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers from .rec_seq_encoder import SequenceEncoder import numpy as np class AttentionPredict(object): def __init__(self, params): super(AttentionPredict, self).__init__() self.char_num = params['char_num'] self.encoder = SequenceEncoder(params) self.decoder_size = params['Attention']['decoder_size'] self.word_vector_dim = params['Attention']['word_vector_dim'] self.encoder_type = params['encoder_type'] self.max_length = params['max_text_length'] def simple_attention(self, encoder_vec, encoder_proj, decoder_state, decoder_size): decoder_state_proj = layers.fc(input=decoder_state, size=decoder_size, bias_attr=False, name="decoder_state_proj_fc") decoder_state_expand = layers.sequence_expand( x=decoder_state_proj, y=encoder_proj) concated = layers.elementwise_add(encoder_proj, decoder_state_expand) concated = layers.tanh(x=concated) attention_weights = layers.fc(input=concated, size=1, act=None, bias_attr=False, name="attention_weights_fc") attention_weights = layers.sequence_softmax(input=attention_weights) weigths_reshape = layers.reshape(x=attention_weights, shape=[-1]) scaled = layers.elementwise_mul( x=encoder_vec, y=weigths_reshape, axis=0) context = layers.sequence_pool(input=scaled, pool_type='sum') return context def gru_decoder_with_attention(self, target_embedding, encoder_vec, encoder_proj, decoder_boot, decoder_size, char_num): rnn = layers.DynamicRNN() with rnn.block(): current_word = rnn.step_input(target_embedding) encoder_vec = rnn.static_input(encoder_vec) encoder_proj = rnn.static_input(encoder_proj) hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True) context = self.simple_attention(encoder_vec, encoder_proj, hidden_mem, decoder_size) fc_1 = layers.fc(input=context, size=decoder_size * 3, bias_attr=False, name="rnn_fc1") fc_2 = layers.fc(input=current_word, size=decoder_size * 3, bias_attr=False, name="rnn_fc2") decoder_inputs = fc_1 + fc_2 h, _, _ = layers.gru_unit( input=decoder_inputs, hidden=hidden_mem, size=decoder_size * 3) rnn.update_memory(hidden_mem, h) out = layers.fc(input=h, size=char_num, bias_attr=True, act='softmax', name="rnn_out_fc") rnn.output(out) return rnn() def gru_attention_infer(self, decoder_boot, max_length, char_num, word_vector_dim, encoded_vector, encoded_proj, decoder_size): init_state = decoder_boot beam_size = 1 array_len = layers.fill_constant( shape=[1], dtype='int64', value=max_length) counter = layers.zeros(shape=[1], dtype='int64', force_cpu=True) # fill the first element with init_state state_array = layers.create_array('float32') layers.array_write(init_state, array=state_array, i=counter) # ids, scores as memory ids_array = layers.create_array('int64') scores_array = layers.create_array('float32') rois_shape = layers.shape(init_state) batch_size = layers.slice( rois_shape, axes=[0], starts=[0], ends=[1]) + 1 lod_level = layers.range( start=0, end=batch_size, step=1, dtype=batch_size.dtype) init_ids = layers.fill_constant_batch_size_like( input=init_state, shape=[-1, 1], value=0, dtype='int64') init_ids = layers.lod_reset(init_ids, lod_level) init_ids = layers.lod_append(init_ids, lod_level) init_scores = layers.fill_constant_batch_size_like( input=init_state, shape=[-1, 1], value=1, dtype='float32') init_scores = layers.lod_reset(init_scores, init_ids) layers.array_write(init_ids, array=ids_array, i=counter) layers.array_write(init_scores, array=scores_array, i=counter) full_ids = fluid.layers.fill_constant_batch_size_like( input=init_state, shape=[-1, 1], dtype='int64', value=1) full_scores = fluid.layers.fill_constant_batch_size_like( input=init_state, shape=[-1, 1], dtype='float32', value=1) cond = layers.less_than(x=counter, y=array_len) while_op = layers.While(cond=cond) with while_op.block(): pre_ids = layers.array_read(array=ids_array, i=counter) pre_state = layers.array_read(array=state_array, i=counter) pre_score = layers.array_read(array=scores_array, i=counter) pre_ids_emb = layers.embedding( input=pre_ids, size=[char_num, word_vector_dim], dtype='float32') context = self.simple_attention(encoded_vector, encoded_proj, pre_state, decoder_size) # expand the recursive_sequence_lengths of pre_state # to be the same with pre_score pre_state_expanded = layers.sequence_expand(pre_state, pre_score) context_expanded = layers.sequence_expand(context, pre_score) fc_1 = layers.fc(input=context_expanded, size=decoder_size * 3, bias_attr=False, name="rnn_fc1") fc_2 = layers.fc(input=pre_ids_emb, size=decoder_size * 3, bias_attr=False, name="rnn_fc2") decoder_inputs = fc_1 + fc_2 current_state, _, _ = layers.gru_unit( input=decoder_inputs, hidden=pre_state_expanded, size=decoder_size * 3) current_state_with_lod = layers.lod_reset( x=current_state, y=pre_score) # use score to do beam search current_score = layers.fc(input=current_state_with_lod, size=char_num, bias_attr=True, act='softmax', name="rnn_out_fc") topk_scores, topk_indices = layers.topk(current_score, k=beam_size) new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1) fluid.layers.assign(new_ids, full_ids) new_scores = fluid.layers.concat([full_scores, topk_scores], axis=1) fluid.layers.assign(new_scores, full_scores) layers.increment(x=counter, value=1, in_place=True) # update the memories layers.array_write(current_state, array=state_array, i=counter) layers.array_write(topk_indices, array=ids_array, i=counter) layers.array_write(topk_scores, array=scores_array, i=counter) # update the break condition: # up to the max length or all candidates of # source sentences have ended. length_cond = layers.less_than(x=counter, y=array_len) finish_cond = layers.logical_not(layers.is_empty(x=topk_indices)) layers.logical_and(x=length_cond, y=finish_cond, out=cond) return full_ids, full_scores def __call__(self, inputs, labels=None, mode=None): encoder_features = self.encoder(inputs) char_num = self.char_num word_vector_dim = self.word_vector_dim decoder_size = self.decoder_size if self.encoder_type == "reshape": encoder_input = encoder_features encoded_vector = encoder_features else: encoder_input = encoder_features[1] encoded_vector = layers.concat(encoder_features, axis=1) encoded_proj = layers.fc(input=encoded_vector, size=decoder_size, bias_attr=False, name="encoded_proj_fc") backward_first = layers.sequence_pool( input=encoder_input, pool_type='first') decoder_boot = layers.fc(input=backward_first, size=decoder_size, bias_attr=False, act="relu", name='decoder_boot') if mode == "train": label_in = labels['label_in'] label_out = labels['label_out'] label_in = layers.cast(x=label_in, dtype='int64') trg_embedding = layers.embedding( input=label_in, size=[char_num, word_vector_dim], dtype='float32') predict = self.gru_decoder_with_attention( trg_embedding, encoded_vector, encoded_proj, decoder_boot, decoder_size, char_num) _, decoded_out = layers.topk(input=predict, k=1) decoded_out = layers.lod_reset(decoded_out, y=label_out) predicts = {'predict':predict, 'decoded_out':decoded_out} else: ids, predict = self.gru_attention_infer( decoder_boot, self.max_length, char_num, word_vector_dim, encoded_vector, encoded_proj, decoder_size) predicts = {'predict':predict, 'decoded_out':ids} return predicts