# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import math import numpy as np import paddle import paddle.nn as nn from paddlenlp.layers.crf import LinearChainCrf, ViterbiDecoder class BiGruCrf(nn.Layer): """The network for lexical analysis, based on two layers of BiGRU and one layer of CRF. More details see https://arxiv.org/abs/1807.01882 Args: word_emb_dim (int): The dimension in which a word is embedded. hidden_size (int): The number of hidden nodes in the GRU layer. vocab_size (int): the word vocab size. num_labels (int): the labels amount. emb_lr (float, optional): The scaling of the learning rate of the embedding layer. Defaults to 2.0. crf_lr (float, optional): The scaling of the learning rate of the crf layer. Defaults to 0.2. """ def __init__(self, word_emb_dim, hidden_size, vocab_size, num_labels, emb_lr=2.0, crf_lr=0.2, with_start_stop_tag=True): super(BiGruCrf, self).__init__() self.word_emb_dim = word_emb_dim self.vocab_size = vocab_size self.num_labels = num_labels self.hidden_size = hidden_size self.emb_lr = emb_lr self.crf_lr = crf_lr self.init_bound = 0.1 self.word_embedding = nn.Embedding( num_embeddings=self.vocab_size, embedding_dim=self.word_emb_dim, weight_attr=paddle.ParamAttr( learning_rate=self.emb_lr, initializer=nn.initializer.Uniform( low=-self.init_bound, high=self.init_bound))) self.gru = nn.GRU( input_size=self.word_emb_dim, hidden_size=self.hidden_size, num_layers=2, direction='bidirectional', weight_ih_attr=paddle.ParamAttr( initializer=nn.initializer.Uniform( low=-self.init_bound, high=self.init_bound), regularizer=paddle.regularizer.L2Decay(coeff=1e-4)), weight_hh_attr=paddle.ParamAttr( initializer=nn.initializer.Uniform( low=-self.init_bound, high=self.init_bound), regularizer=paddle.regularizer.L2Decay(coeff=1e-4))) self.fc = nn.Linear( in_features=self.hidden_size * 2, out_features=self.num_labels + 2 \ if with_start_stop_tag else self.num_labels, weight_attr=paddle.ParamAttr( initializer=nn.initializer.Uniform( low=-self.init_bound, high=self.init_bound), regularizer=paddle.regularizer.L2Decay(coeff=1e-4))) self.crf = LinearChainCrf(self.num_labels, self.crf_lr, with_start_stop_tag) self.viterbi_decoder = ViterbiDecoder(self.crf.transitions, with_start_stop_tag) def forward(self, inputs, lengths): word_embed = self.word_embedding(inputs) bigru_output, _ = self.gru(word_embed) emission = self.fc(bigru_output) _, prediction = self.viterbi_decoder(emission, lengths) return emission, lengths, prediction