diff --git a/hapi/text/text.py b/hapi/text/text.py index 1228d9fd0b7a652cf3103ceabaa9fa49ddd59abd..5ff5d1c35c4411e087efc43ecb300019465c066c 100644 --- a/hapi/text/text.py +++ b/hapi/text/text.py @@ -19,7 +19,6 @@ from __future__ import print_function import os import six import sys - if six.PY2: reload(sys) sys.setdefaultencoding('utf8') @@ -50,8 +49,8 @@ __all__ = [ 'BeamSearchDecoder', 'MultiHeadAttention', 'FFN', 'TransformerEncoderLayer', 'TransformerEncoder', 'TransformerDecoderLayer', 'TransformerDecoder', 'TransformerBeamSearchDecoder', 'Linear_chain_crf', - 'Crf_decoding', 'SequenceTagging', 'GRUEncoderLayer', 'CNNEncoder', - 'BOWEncoder', 'SimpleConvPoolLayer', 'GRUEncoder', 'DynamicGRU', 'LSTMEncoder' + 'Crf_decoding', 'SequenceTagging', 'GRUEncoderLayer', 'SimCNNEncoder', + 'SimBOWEncoder', 'SimpleConvPoolLayer', 'SimGRUEncoder', 'DynamicGRU', 'SimLSTMEncoder' ] @@ -89,12 +88,12 @@ class RNNCell(Layer): batch_ref = flatten(batch_ref)[0] def _is_shape_sequence(seq): - if sys.version_info < (3,): + if sys.version_info < (3, ): integer_types = ( int, - long,) + long, ) else: - integer_types = (int,) + integer_types = (int, ) """For shape, list/tuple of integer is the finest-grained objection""" if (isinstance(seq, list) or isinstance(seq, tuple)): if reduce( @@ -249,8 +248,8 @@ class BasicLSTMCell(RNNCell): self.use_customized_weight = False for _weights in [ - forget_gate_weights, input_gate_weights, output_gate_weights, - cell_weights + forget_gate_weights, input_gate_weights, output_gate_weights, + cell_weights ]: for _key in _weights: if _weights[_key] is not None: @@ -275,7 +274,7 @@ class BasicLSTMCell(RNNCell): is_bias=True) else: if "w" in forget_gate_weights and forget_gate_weights[ - "w"] is not None: + "w"] is not None: self.fg_w = forget_gate_weights["w"] else: if self._param_attr is not None and self._param_attr.name is not None: @@ -289,7 +288,7 @@ class BasicLSTMCell(RNNCell): dtype=self._dtype) if "h" in forget_gate_weights and forget_gate_weights[ - "h"] is not None: + "h"] is not None: self.fg_h = forget_gate_weights["h"] else: if self._param_attr is not None and self._param_attr.name is not None: @@ -303,7 +302,7 @@ class BasicLSTMCell(RNNCell): dtype=self._dtype) if "b" in forget_gate_weights and forget_gate_weights[ - "b"] is not None: + "b"] is not None: self.fg_b = forget_gate_weights["b"] else: if self._bias_attr is not None and self._bias_attr.name is not None: @@ -318,7 +317,7 @@ class BasicLSTMCell(RNNCell): is_bias=True) if "w" in input_gate_weights and input_gate_weights[ - "w"] is not None: + "w"] is not None: self.ig_w = input_gate_weights["w"] else: if self._param_attr is not None and self._param_attr.name is not None: @@ -333,7 +332,7 @@ class BasicLSTMCell(RNNCell): dtype=self._dtype) if "h" in input_gate_weights and input_gate_weights[ - "h"] is not None: + "h"] is not None: self.ig_h = input_gate_weights["h"] else: if self._param_attr is not None and self._param_attr.name is not None: @@ -348,7 +347,7 @@ class BasicLSTMCell(RNNCell): dtype=self._dtype) if "b" in input_gate_weights and input_gate_weights[ - "b"] is not None: + "b"] is not None: self.ig_b = input_gate_weights["b"] else: if self._bias_attr is not None and self._bias_attr.name is not None: @@ -363,7 +362,7 @@ class BasicLSTMCell(RNNCell): is_bias=True) if "w" in output_gate_weights and output_gate_weights[ - "w"] is not None: + "w"] is not None: self.og_w = output_gate_weights["w"] else: if self._param_attr is not None and self._param_attr.name is not None: @@ -377,7 +376,7 @@ class BasicLSTMCell(RNNCell): dtype=self._dtype) if "h" in output_gate_weights and output_gate_weights[ - "h"] is not None: + "h"] is not None: self.og_h = output_gate_weights["h"] else: if self._param_attr is not None and self._param_attr.name is not None: @@ -392,7 +391,7 @@ class BasicLSTMCell(RNNCell): dtype=self._dtype) if "b" in output_gate_weights and output_gate_weights[ - "b"] is not None: + "b"] is not None: self.og_b = output_gate_weights["b"] else: if self._bias_attr is not None and self._bias_attr.name is not None: @@ -547,7 +546,7 @@ class BasicGRUCell(RNNCell): self.use_customized_weight = False for _weights in [ - update_gate_weights, reset_gate_weights, cell_weights + update_gate_weights, reset_gate_weights, cell_weights ]: for _key in _weights: if _weights[_key] is not None: @@ -603,7 +602,7 @@ class BasicGRUCell(RNNCell): # create the parameters of gates in gru if "w" in update_gate_weights and update_gate_weights[ - "w"] is not None: + "w"] is not None: self.ug_w = update_gate_weights["w"] else: if gate_param_attr is not None and gate_param_attr.name is not None: @@ -617,7 +616,7 @@ class BasicGRUCell(RNNCell): dtype=self._dtype) if "h" in update_gate_weights and update_gate_weights[ - "h"] is not None: + "h"] is not None: self.ug_h = update_gate_weights["h"] else: if gate_param_attr is not None and gate_param_attr.name is not None: @@ -631,7 +630,7 @@ class BasicGRUCell(RNNCell): dtype=self._dtype) if "b" in update_gate_weights and update_gate_weights[ - "b"] is not None: + "b"] is not None: self.ug_b = update_gate_weights["b"] else: if gate_bias_attr is not None and gate_bias_attr.name is not None: @@ -647,7 +646,7 @@ class BasicGRUCell(RNNCell): # reset gate parameters if "w" in reset_gate_weights and reset_gate_weights[ - "w"] is not None: + "w"] is not None: self.rg_w = reset_gate_weights["w"] else: if gate_param_attr is not None and gate_param_attr.name is not None: @@ -661,7 +660,7 @@ class BasicGRUCell(RNNCell): dtype=self._dtype) if "h" in reset_gate_weights and reset_gate_weights[ - "h"] is not None: + "h"] is not None: self.rg_h = reset_gate_weights["h"] else: if gate_param_attr is not None and gate_param_attr.name is not None: @@ -675,7 +674,7 @@ class BasicGRUCell(RNNCell): dtype=self._dtype) if "b" in reset_gate_weights and reset_gate_weights[ - "b"] is not None: + "b"] is not None: self.rg_b = reused_params["b"] else: if gate_bias_attr is not None and gate_bias_attr.name is not None: @@ -803,7 +802,7 @@ class RNN(fluid.dygraph.Layer): new_state = fluid.layers.elementwise_mul( new_state, step_mask, axis=0) - fluid.layers.elementwise_mul( - state, (step_mask - 1), axis=0) + state, (step_mask - 1), axis=0) return new_state flat_inputs = flatten(inputs) @@ -849,8 +848,8 @@ class RNN(fluid.dygraph.Layer): outputs = map_structure( lambda x: ArrayWrapper(x), step_outputs) if i == 0 else map_structure( - lambda x, x_array: x_array.append(x), step_outputs, - outputs) + lambda x, x_array: x_array.append(x), step_outputs, + outputs) final_outputs = map_structure( lambda x: fluid.layers.stack(x.array, @@ -919,7 +918,7 @@ class DynamicDecode(Layer): step_mask.stop_gradient = True new_state = layers.elementwise_mul( state, step_mask, axis=0) - layers.elementwise_mul( - new_state, (step_mask - 1), axis=0) + new_state, (step_mask - 1), axis=0) if convert_dtype(state_dtype) in ["bool"]: new_state = layers.cast(new_state, dtype=state_dtype) return new_state @@ -961,8 +960,8 @@ class DynamicDecode(Layer): outputs = map_structure( lambda x: ArrayWrapper(x), step_outputs) if step_idx == 0 else map_structure( - lambda x, x_array: x_array.append(x), step_outputs, - outputs) + lambda x, x_array: x_array.append(x), step_outputs, + outputs) inputs, states, finished, sequence_lengths = ( next_inputs, next_states, next_finished, next_sequence_lengths) @@ -991,7 +990,7 @@ class DynamicDecode(Layer): return (final_outputs, final_states, sequence_lengths) if self.return_length else ( - final_outputs, final_states) + final_outputs, final_states) else: return fluid.layers.dynamic_decode( self.decoder, @@ -1042,7 +1041,7 @@ class TransformerBeamSearchDecoder(layers.BeamSearchDecoder): x = layers.reshape( x, [0] * (len(x.shape) - var_dim_in_state ) + [self.batch_size * self.beam_size] + - [int(size) for size in x.shape[-var_dim_in_state + 2:]]) + [int(size) for size in x.shape[-var_dim_in_state + 2:]]) x = layers.transpose( x, list(range((len(x.shape) + 1 - var_dim_in_state), len(x.shape))) + @@ -1053,9 +1052,9 @@ class TransformerBeamSearchDecoder(layers.BeamSearchDecoder): var_dim_size = layers.shape(x)[self.var_dim_in_state] x = layers.reshape( x, [-1, self.beam_size] + - [int(size) - for size in x.shape[1:self.var_dim_in_state]] + [var_dim_size] + - [int(size) for size in x.shape[self.var_dim_in_state + 1:]]) + [int(size) + for size in x.shape[1:self.var_dim_in_state]] + [var_dim_size] + + [int(size) for size in x.shape[self.var_dim_in_state + 1:]]) return x def step(self, time, inputs, states, **kwargs): @@ -1118,7 +1117,7 @@ class PrePostProcessLayer(Layer): elif cmd == "d": # add dropout self.functors.append(lambda x: layers.dropout( x, dropout_prob=dropout_rate, is_test=False) - if dropout_rate else x) + if dropout_rate else x) def forward(self, x, residual=None): for i, cmd in enumerate(self.process_cmd): @@ -1219,7 +1218,7 @@ class MultiHeadAttention(Layer): # scale dot product attention product = layers.matmul( - x=q, y=k, transpose_y=True, alpha=self.d_model ** -0.5) + x=q, y=k, transpose_y=True, alpha=self.d_model**-0.5) if attn_bias: product += attn_bias weights = layers.softmax(product) @@ -1309,6 +1308,7 @@ class TransformerEncoderLayer(Layer): reused_ffn_weights={"reused_fc1": None, "reused_fc2": None}, reused_post_ffn_layernorm=None): + super(TransformerEncoderLayer, self).__init__() self.preprocesser1 = PrePostProcessLayer(preprocess_cmd, d_model, @@ -1556,7 +1556,7 @@ class TransformerDecoder(Layer): ] -# TODO: we should merge GRUCell with BasicGRUCell +#TODO: we should merge GRUCell with BasicGRUCell class GRUCell(RNNCell): def __init__(self, input_size, @@ -1590,7 +1590,7 @@ class GRUCell(RNNCell): return [self.hidden_size] -# TODO: we should merge GRUCell with BasicGRUCell +#TODO: we should merge GRUCell with BasicGRUCell class GRUEncoderCell(RNNCell): def __init__(self, num_layers, @@ -1606,7 +1606,7 @@ class GRUEncoderCell(RNNCell): self.gru_cells.append( self.add_sublayer( "gru_%d" % i, - # BasicGRUCell( + #BasicGRUCell( GRUCell( input_size=input_size if i == 0 else hidden_size, hidden_size=hidden_size, @@ -1673,6 +1673,7 @@ class Linear_chain_crf(fluid.dygraph.Layer): self._transition = value def forward(self, input, label, length=None): + alpha = self._helper.create_variable_for_type_inference( dtype=self._dtype) emission_exps = self._helper.create_variable_for_type_inference( @@ -1723,6 +1724,7 @@ class Crf_decoding(fluid.dygraph.Layer): self._transition = value def forward(self, input, label=None, length=None): + viterbi_path = self._helper.create_variable_for_type_inference( dtype=self._dtype) this_inputs = { @@ -1919,7 +1921,7 @@ class SimpleConvPoolLayer(Layer): return x -class CNNEncoder(Layer): +class SimCNNEncoder(Layer): """ simple CNNEncoder for simnet """ @@ -1933,7 +1935,7 @@ class CNNEncoder(Layer): padding_idx, act ): - super(CNNEncoder, self).__init__() + super(SimCNNEncoder, self).__init__() self.dict_size = dict_size self.emb_dim = emb_dim self.filter_size = filter_size @@ -1962,7 +1964,7 @@ class CNNEncoder(Layer): emb_out=self.cnn_layer(emb_reshape) return emb_out -class BOWEncoder(Layer): +class SimBOWEncoder(Layer): """ simple BOWEncoder for simnet """ @@ -1973,7 +1975,7 @@ class BOWEncoder(Layer): seq_len, padding_idx ): - super(BOWEncoder, self).__init__() + super(SimBOWEncoder, self).__init__() self.dict_size = dict_size self.bow_dim = bow_dim self.seq_len = seq_len @@ -2034,7 +2036,7 @@ class DynamicGRU(fluid.dygraph.Layer): res = fluid.layers.concat(res, axis=1) return res -class GRUEncoder(Layer): +class SimGRUEncoder(Layer): """ simple GRUEncoder for simnet """ @@ -2046,7 +2048,7 @@ class GRUEncoder(Layer): padding_idx, seq_len ): - super(GRUEncoder, self).__init__() + super(SimGRUEncoder, self).__init__() self.dict_size = dict_size self.emb_dim = emb_dim self.gru_dim = gru_dim @@ -2071,7 +2073,7 @@ class GRUEncoder(Layer): gru = fluid.layers.tanh(gru) return gru -class LSTMEncoder(Layer): +class SimLSTMEncoder(Layer): """ simple LSTMEncoder for simnet """ @@ -2087,7 +2089,7 @@ class LSTMEncoder(Layer): """ initialize """ - super(LSTMEncoder, self).__init__() + super(SimLSTMEncoder, self).__init__() self.dict_size = dict_size self.emb_dim = emb_dim self.lstm_dim = lstm_dim