# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle.fluid as fluid from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, Embedding from paddle.fluid.dygraph import GRUUnit from paddle.fluid.dygraph.base import to_variable import numpy as np class DynamicGRU(fluid.dygraph.Layer): def __init__(self, scope_name, size, param_attr=None, bias_attr=None, is_reverse=False, gate_activation='sigmoid', candidate_activation='tanh', h_0=None, origin_mode=False, init_size=None): super(DynamicGRU, self).__init__(scope_name) self.gru_unit = GRUUnit( self.full_name(), size * 3, param_attr=param_attr, bias_attr=bias_attr, activation=candidate_activation, gate_activation=gate_activation, origin_mode=origin_mode) self.size = size self.h_0 = h_0 self.is_reverse = is_reverse def forward(self, inputs): hidden = self.h_0 res = [] for i in range(inputs.shape[1]): if self.is_reverse: i = inputs.shape[1] - 1 - i input_ = inputs[:, i:i + 1, :] input_ = fluid.layers.reshape( input_, [-1, input_.shape[2]], inplace=False) hidden, reset, gate = self.gru_unit(input_, hidden) hidden_ = fluid.layers.reshape( hidden, [-1, 1, hidden.shape[1]], inplace=False) res.append(hidden_) if self.is_reverse: res = res[::-1] res = fluid.layers.concat(res, axis=1) return res class LAC(fluid.dygraph.Layer): def __init__(self, name_scope, args, vocab_size, num_labels, for_infer=True, target=None): super(LAC, self).__init__(name_scope) self.word_emb_dim = args.word_emb_dim self.dict_dim = vocab_size self.grnn_hidden_dim = args.grnn_hidden_dim self.emb_lr = args.emb_learning_rate if 'emb_learning_rate' in dir( args) else 1.0 self.crf_lr = args.emb_learning_rate if 'crf_learning_rate' in dir( args) else 1.0 self.bigru_num = args.bigru_num self.init_bound = 0.1 self.IS_SPARSE = True self.max_seq_lens = args.max_seq_lens self.grnn_hidden_dim = args.grnn_hidden_dim self._word_embedding = Embedding( self.full_name(), size=[vocab_size, self.word_emb_dim], dtype='float32', is_sparse=self.IS_SPARSE, param_attr=fluid.ParamAttr( learning_rate=self.emb_lr, initializer=fluid.initializer.Uniform( low=-self.init_bound, high=self.init_bound))) self._emission_fc = FC( self.full_name(), size=num_labels, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-self.init_bound, high=self.init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) def _bigru_layer(input_feature, grnn_hidden_dim): """ define the bidirectional gru layer """ pre_gru = FC(input=input_feature, size=grnn_hidden_dim * 3, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-init_bound, high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) gru = DynamicGRU( input=pre_gru, size=grnn_hidden_dim, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-init_bound, high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) pre_gru_r = FC(input=input_feature, size=grnn_hidden_dim * 3, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-init_bound, high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) gru_r = DynamicGRU( input=pre_gru_r, size=grnn_hidden_dim, is_reverse=True, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-init_bound, high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) bi_merge = fluid.layers.concat(input=[gru, gru_r], axis=1) return bi_merge def forward(self, inputs, targets, seq_lens): emb = self._word_embedding(inputs) o_np_mask = (inputs.numpy() != self.dict_dim).astype('float32') mask_emb = fluid.layers.expand( to_variable(o_np_mask), [1, self.word_emb_dim]) emb = emb * mask_emb emb = fluid.layers.reshape( emb, shape=[-1, 1, self.max_seq_lens, self.hid_dim]) input_feature = emb for i in range(self.bigru_num): bigru_output = _bigru_layer(input_feature, self._grnn_hidden_dim) input_feature = bigru_output emission = self_emission_fc(input_feature) if targets is not None: crf_cost = fluid.layers.linear_chain_crf( input=emission, label=target, param_attr=fluid.ParamAttr( name='crfw', learning_rate=crf_lr), length=seq_lens) avg_cost = fluid.layers.mean(x=crf_cost) crf_decode = fluid.layers.crf_decoding( input=emission, param_attr=fluid.ParamAttr(name='crfw'), length=seq_lens) return avg_cost, crf_decode else: size = emission.shape[1] fluid.layers.create_parameter( shape=[size + 2, size], dtype=emission.dtype, name='crfw') crf_decode = fluid.layers.crf_decoding( input=emission, param_attr=fluid.ParamAttr(name='crfw'), length=seq_lens) return crf_decode