# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle.fluid as fluid class DecAttNet(): """decompose attention net""" def __init__(self, config): self._config = config self.initializer = fluid.initializer.Xavier(uniform=False) def __call__(self, seq1, seq2, mask1, mask2, label): return self.body(seq1, seq2, mask1, mask2, label) def body(self, seq1, seq2, mask1, mask2, label): """Body function""" transformed_q1 = self.transformation(seq1) transformed_q2 = self.transformation(seq2) masked_q1 = self.apply_mask(transformed_q1, mask1) masked_q2 = self.apply_mask(transformed_q2, mask2) alpha, beta = self.attend(masked_q1, masked_q2) if self._config.share_wight_btw_seq: seq1_compare = self.compare(masked_q1, beta, param_prefix='compare') seq2_compare = self.compare(masked_q2, alpha, param_prefix='compare') else: seq1_compare = self.compare(masked_q1, beta, param_prefix='compare_1') seq2_compare = self.compare(masked_q2, alpha, param_prefix='compare_2') aggregate_res = self.aggregate(seq1_compare, seq2_compare) prediction = fluid.layers.fc(aggregate_res, size=self._config.class_dim, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(x=loss) acc = fluid.layers.accuracy(input=prediction, label=label) return avg_cost, acc, prediction def apply_mask(self, seq, mask): """ apply mask on seq Input: seq in shape [batch_size, seq_len, embedding_size] Input: mask in shape [batch_size, seq_len] Output: masked seq in shape [batch_size, seq_len, embedding_size] """ return fluid.layers.elementwise_mul(x=seq, y=mask, axis=0) def feed_forward_2d(self, vec, param_prefix): """ Input: vec in shape [batch_size, seq_len, vec_dim] Output: fc2 in shape [batch_size, seq_len, num_units[1]] """ fc1 = fluid.layers.fc(vec, size=self._config.num_units[0], num_flatten_dims=2, param_attr=fluid.ParamAttr(name=param_prefix+'_fc1.w', initializer=self.initializer), bias_attr=param_prefix + '_fc1.b', act='relu') fc1 = fluid.layers.dropout(fc1, dropout_prob = self._config.droprate) fc2 = fluid.layers.fc(fc1, size=self._config.num_units[1], num_flatten_dims=2, param_attr=fluid.ParamAttr(name=param_prefix+'_fc2.w', initializer=self.initializer), bias_attr=param_prefix + '_fc2.b', act='relu') fc2 = fluid.layers.dropout(fc2, dropout_prob = self._config.droprate) return fc2 def feed_forward(self, vec, param_prefix): """ Input: vec in shape [batch_size, vec_dim] Output: fc2 in shape [batch_size, num_units[1]] """ fc1 = fluid.layers.fc(vec, size=self._config.num_units[0], num_flatten_dims=1, param_attr=fluid.ParamAttr(name=param_prefix+'_fc1.w', initializer=self.initializer), bias_attr=param_prefix + '_fc1.b', act='relu') fc1 = fluid.layers.dropout(fc1, dropout_prob = self._config.droprate) fc2 = fluid.layers.fc(fc1, size=self._config.num_units[1], num_flatten_dims=1, param_attr=fluid.ParamAttr(name=param_prefix+'_fc2.w', initializer=self.initializer), bias_attr=param_prefix + '_fc2.b', act='relu') fc2 = fluid.layers.dropout(fc2, dropout_prob = self._config.droprate) return fc2 def transformation(self, seq): embed = fluid.layers.embedding(input=seq, size=[self._config.dict_dim, self._config.emb_dim], param_attr=fluid.ParamAttr(name='emb.w', trainable=self._config.word_embedding_trainable)) if self._config.proj_emb_dim is not None: return fluid.layers.fc(embed, size=self._config.proj_emb_dim, num_flatten_dims=2, param_attr=fluid.ParamAttr(name='project' + '_fc1.w', initializer=self.initializer), bias_attr=False, act=None) return embed def attend(self, seq1, seq2): """ Input: seq1, shape [batch_size, seq_len1, embed_size] Input: seq2, shape [batch_size, seq_len2, embed_size] Output: alpha, shape [batch_size, seq_len1, embed_size] Output: beta, shape [batch_size, seq_len2, embed_size] """ if self._config.share_wight_btw_seq: seq1 = self.feed_forward_2d(seq1, param_prefix="attend") seq2 = self.feed_forward_2d(seq2, param_prefix="attend") else: seq1 = self.feed_forward_2d(seq1, param_prefix="attend_1") seq2 = self.feed_forward_2d(seq2, param_prefix="attend_2") attention_weight = fluid.layers.matmul(seq1, seq2, transpose_y=True) normalized_attention_weight = fluid.layers.softmax(attention_weight) beta = fluid.layers.matmul(normalized_attention_weight, seq2) attention_weight_t = fluid.layers.transpose(attention_weight, perm=[0, 2, 1]) normalized_attention_weight_t = fluid.layers.softmax(attention_weight_t) alpha = fluid.layers.matmul(normalized_attention_weight_t, seq1) return alpha, beta def compare(self, seq, soft_alignment, param_prefix): concat_seq = fluid.layers.concat(input=[seq, soft_alignment], axis=2) return self.feed_forward_2d(concat_seq, param_prefix="compare") def aggregate(self, vec1, vec2): vec1 = fluid.layers.reduce_sum(vec1, dim=1) vec2 = fluid.layers.reduce_sum(vec2, dim=1) concat_vec = fluid.layers.concat(input=[vec1, vec2], axis=1) return self.feed_forward(concat_vec, param_prefix='aggregate')