提交 eb20b652 编写于 作者: G guosheng

Add more unit tests for apis in text.py.

Rename some apis in text.py.
上级 60917f41
......@@ -16,7 +16,7 @@ from paddle.fluid.dygraph.nn import Linear, Embedding
from paddle.fluid.dygraph.base import to_variable
import numpy as np
from hapi.model import Model
from hapi.text.text import GRUEncoderLayer as BiGRUEncoder
from hapi.text.text import GRUEncoder as BiGRUEncoder
from hapi.text.test import BOWEncoder, CNNEncoder, GRUEncoder
......@@ -36,14 +36,18 @@ class CNN(Model):
dict_size=self.dict_dim + 1,
emb_dim=self.emb_dim,
seq_len=self.seq_len,
filter_size= self.win_size,
num_filters= self.hid_dim,
hidden_dim= self.hid_dim,
filter_size=self.win_size,
num_filters=self.hid_dim,
hidden_dim=self.hid_dim,
padding_idx=None,
act='tanh')
self._fc1 = Linear(input_dim = self.hid_dim*self.seq_len, output_dim=self.fc_hid_dim, act="softmax")
self._fc_prediction = Linear(input_dim = self.fc_hid_dim,
output_dim = self.class_dim,
self._fc1 = Linear(
input_dim=self.hid_dim * self.seq_len,
output_dim=self.fc_hid_dim,
act="softmax")
self._fc_prediction = Linear(
input_dim=self.fc_hid_dim,
output_dim=self.class_dim,
act="softmax")
def forward(self, inputs):
......@@ -69,10 +73,13 @@ class BOW(Model):
padding_idx=None,
bow_dim=self.hid_dim,
seq_len=self.seq_len)
self._fc1 = Linear(input_dim = self.hid_dim, output_dim=self.hid_dim, act="tanh")
self._fc2 = Linear(input_dim = self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(input_dim = self.fc_hid_dim,
output_dim = self.class_dim,
self._fc1 = Linear(
input_dim=self.hid_dim, output_dim=self.hid_dim, act="tanh")
self._fc2 = Linear(
input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(
input_dim=self.fc_hid_dim,
output_dim=self.class_dim,
act="softmax")
def forward(self, inputs):
......@@ -94,8 +101,10 @@ class GRU(Model):
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(input_dim=self.fc_hid_dim,
self._fc1 = Linear(
input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(
input_dim=self.fc_hid_dim,
output_dim=self.class_dim,
act="softmax")
self._encoder = GRUEncoder(
......@@ -130,9 +139,11 @@ class BiGRU(Model):
is_sparse=False)
h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
h_0 = to_variable(h_0)
self._fc1 = Linear(input_dim = self.hid_dim, output_dim=self.hid_dim*3)
self._fc2 = Linear(input_dim = self.hid_dim*2, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(input_dim=self.fc_hid_dim,
self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.hid_dim * 3)
self._fc2 = Linear(
input_dim=self.hid_dim * 2, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(
input_dim=self.fc_hid_dim,
output_dim=self.class_dim,
act="softmax")
self._encoder = BiGRUEncoder(
......@@ -144,7 +155,8 @@ class BiGRU(Model):
def forward(self, inputs):
emb = self.embedding(inputs)
emb = fluid.layers.reshape(emb, shape=[self.batch_size, -1, self.hid_dim])
emb = fluid.layers.reshape(
emb, shape=[self.batch_size, -1, self.hid_dim])
fc_1 = self._fc1(emb)
encoded_vector = self._encoder(fc_1)
encoded_vector = fluid.layers.tanh(encoded_vector)
......
......@@ -21,7 +21,7 @@ import paddle.fluid.layers as layers
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
from hapi.model import Model, CrossEntropy, Loss
from hapi.text import TransformerCell, TransformerBeamSearchDecoder, DynamicDecode
from hapi.text import TransformerBeamSearchDecoder, DynamicDecode
def position_encoding_init(n_position, d_pos_vec):
......@@ -606,6 +606,27 @@ class Transformer(Model):
return predict
class TransformerCell(Layer):
"""
Let inputs=(trg_word, trg_pos), states=cache to make Transformer can be
used as RNNCell
"""
def __init__(self, decoder):
super(TransformerCell, self).__init__()
self.decoder = decoder
def forward(self, inputs, states, trg_src_attn_bias, enc_output,
static_caches):
trg_word, trg_pos = inputs
for cache, static_cache in zip(states, static_caches):
cache.update(static_cache)
logits = self.decoder(trg_word, trg_pos, None, trg_src_attn_bias,
enc_output, states)
new_states = [{"k": cache["k"], "v": cache["v"]} for cache in states]
return logits, new_states
class InferTransformer(Transformer):
"""
model for prediction
......
......@@ -25,8 +25,8 @@ from paddle.fluid.dygraph import Embedding, Linear, Layer
from paddle.fluid.layers import BeamSearchDecoder
import hapi.text as text
from hapi.model import Model, Input, set_device
from hapi.text import BasicLSTMCell, BasicGRUCell, RNN, DynamicDecode, MultiHeadAttention, TransformerEncoder
from hapi.text import *
# from hapi.text.text import BasicLSTMCell, BasicGRUCell, RNN, DynamicDecode, MultiHeadAttention, TransformerEncoder, TransformerCell
from hapi.text.text import *
def sigmoid(x):
......@@ -187,7 +187,7 @@ class TestBasicLSTM(ModuleApiTest):
Input(
[None, None, self.inputs[-1].shape[-1]],
"float32",
name="input")
name="input"),
]
return inputs
......@@ -216,7 +216,7 @@ class TestBasicGRU(ModuleApiTest):
Input(
[None, None, self.inputs[-1].shape[-1]],
"float32",
name="input")
name="input"),
]
return inputs
......@@ -270,10 +270,9 @@ class TestBeamSearch(ModuleApiTest):
Input(
[None, self.inputs[0].shape[-1]],
"float32",
name="init_hidden"), Input(
[None, self.inputs[1].shape[-1]],
"float32",
name="init_cell")
name="init_hidden"),
Input(
[None, self.inputs[1].shape[-1]], "float32", name="init_cell"),
]
return inputs
......@@ -328,10 +327,11 @@ class TestTransformerEncoder(ModuleApiTest):
Input(
[None, None, self.inputs[0].shape[-1]],
"float32",
name="enc_input"), Input(
name="enc_input"),
Input(
[None, self.inputs[1].shape[1], None, None],
"float32",
name="attn_bias")
name="attn_bias"),
]
return inputs
......@@ -395,16 +395,19 @@ class TestTransformerDecoder(TestTransformerEncoder):
Input(
[None, None, self.inputs[0].shape[-1]],
"float32",
name="dec_input"), Input(
name="dec_input"),
Input(
[None, None, self.inputs[0].shape[-1]],
"float32",
name="enc_output"), Input(
name="enc_output"),
Input(
[None, self.inputs[-1].shape[1], None, None],
"float32",
name="self_attn_bias"), Input(
name="self_attn_bias"),
Input(
[None, self.inputs[-1].shape[1], None, None],
"float32",
name="cross_attn_bias")
name="cross_attn_bias"),
]
return inputs
......@@ -414,16 +417,21 @@ class TestTransformerDecoder(TestTransformerEncoder):
class TestTransformerBeamSearchDecoder(ModuleApiTest):
def setUp(self):
shape = (8, 32)
self.inputs = [
np.random.random(shape).astype("float32"),
np.random.random(shape).astype("float32")
# encoder output: [batch_size, seq_len, hidden_size]
np.random.random([2, 5, 128]).astype("float32"),
# cross attention bias: [batch_size, n_head, seq_len, seq_len]
np.random.randint(0, 1, [2, 2, 1, 5]).astype("float32") * -1e9
]
self.outputs = None
self.attrs = {
"vocab_size": 100,
"embed_dim": 32,
"hidden_size": 32,
"n_layer": 2,
"n_head": 2,
"d_key": 64,
"d_value": 64,
"d_model": 128,
"d_inner_hid": 128
}
self.param_states = {}
......@@ -445,13 +453,24 @@ class TestTransformerBeamSearchDecoder(ModuleApiTest):
eos_id=1,
beam_size=4,
max_step_num=20):
embedder = Embedding(size=[vocab_size, d_model])
self.beam_size = beam_size
def embeder_init(self, size):
Layer.__init__(self)
self.embedder = Embedding(size)
Embedder = type("Embedder", (Layer, ), {
"__init__": embeder_init,
"forward": lambda self, word, pos: self.embedder(word)
})
embedder = Embedder(size=[vocab_size, d_model])
output_layer = Linear(d_model, vocab_size)
decoder = TransformerDecoder(n_layer, n_head, d_key, d_value, d_model,
d_inner_hid, prepostprocess_dropout,
attention_dropout, relu_dropout,
self.decoder = TransformerDecoder(
n_layer, n_head, d_key, d_value, d_model, d_inner_hid,
prepostprocess_dropout, attention_dropout, relu_dropout,
preprocess_cmd, postprocess_cmd)
transformer_cell = TransformerCell(decoder)
transformer_cell = TransformerCell(self.decoder, embedder,
output_layer)
self.beam_search_decoder = DynamicDecode(
TransformerBeamSearchDecoder(
transformer_cell,
......@@ -464,23 +483,12 @@ class TestTransformerBeamSearchDecoder(ModuleApiTest):
@staticmethod
def model_forward(self, enc_output, trg_src_attn_bias):
caches = [{
"k": layers.fill_constant_batch_size_like(
input=enc_output,
shape=[-1, self.n_head, 0, self.d_key],
dtype=enc_output.dtype,
value=0),
"v": layers.fill_constant_batch_size_like(
input=enc_output,
shape=[-1, self.n_head, 0, self.d_value],
dtype=enc_output.dtype,
value=0),
} for i in range(self.n_layer)]
caches = self.decoder.prepare_incremental_cache(enc_output)
enc_output = TransformerBeamSearchDecoder.tile_beam_merge_with_batch(
enc_output, self.beam_size)
trg_src_attn_bias = TransformerBeamSearchDecoder.tile_beam_merge_with_batch(
trg_src_attn_bias, self.beam_size)
static_caches = self.decoder.decoder.prepare_static_cache(enc_output)
static_caches = self.decoder.prepare_static_cache(enc_output)
rs, _ = self.beam_search_decoder(
inits=caches,
enc_output=enc_output,
......@@ -491,12 +499,42 @@ class TestTransformerBeamSearchDecoder(ModuleApiTest):
def make_inputs(self):
inputs = [
Input(
[None, self.inputs[0].shape[-1]],
[None, None, self.inputs[0].shape[-1]],
"float32",
name="enc_output"),
Input(
[None, self.inputs[1].shape[1], None, None],
"float32",
name="init_hidden"), Input(
[None, self.inputs[1].shape[-1]],
name="trg_src_attn_bias"),
]
return inputs
def test_check_output(self):
self.check_output()
class TestSequenceTagging(ModuleApiTest):
def setUp(self):
shape = (2, 4, 128)
self.inputs = [np.random.random(shape).astype("float32")]
self.outputs = None
self.attrs = {"input_size": 128, "hidden_size": 128}
self.param_states = {}
@staticmethod
def model_init(self, input_size, hidden_size):
self.module = SequenceTagging(input_size, hidden_size)
@staticmethod
def model_forward(self, inputs):
return self.gru(inputs)[0]
def make_inputs(self):
inputs = [
Input(
[None, None, self.inputs[-1].shape[-1]],
"float32",
name="init_cell")
name="input"),
]
return inputs
......
......@@ -28,6 +28,6 @@ from hapi.text.text import TransformerBeamSearchDecoder as TransformerBeamSearch
from hapi.text.text import GRUCell as GRUCell
from hapi.text.text import GRUEncoderCell as GRUEncoderCell
from hapi.text.text import BiGRU as BiGRU
from hapi.text.text import Linear_chain_crf as Linear_chain_crf
from hapi.text.text import Crf_decoding as Crf_decoding
from hapi.text.text import LinearChainCRF as LinearChainCRF
from hapi.text.text import CRFDecoding as CRFDecoding
from hapi.text.text import SequenceTagging as SequenceTagging
......@@ -49,7 +49,7 @@ __all__ = [
'BeamSearchDecoder', 'MultiHeadAttention', 'FFN',
'TransformerEncoderLayer', 'TransformerEncoder', 'TransformerDecoderLayer',
'TransformerDecoder', 'TransformerCell', 'TransformerBeamSearchDecoder',
'Linear_chain_crf', 'Crf_decoding', 'SequenceTagging', 'GRUEncoderLayer'
'LinearChainCRF', 'CRFDecoding', 'SequenceTagging', 'GRUEncoder'
]
......@@ -1008,18 +1008,38 @@ class TransformerCell(Layer):
used as RNNCell
"""
def __init__(self, decoder):
def __init__(self, decoder, embedding_fn=None, output_fn=None):
super(TransformerCell, self).__init__()
self.decoder = decoder
self.embedding_fn = embedding_fn
self.output_fn = output_fn
def __call__(self, inputs, states, trg_src_attn_bias, enc_output,
def forward(self, inputs, states, trg_src_attn_bias, enc_output,
static_caches):
trg_word, trg_pos = inputs
for cache, static_cache in zip(states, static_caches):
cache.update(static_cache)
logits = self.decoder(trg_word, trg_pos, None, trg_src_attn_bias,
enc_output, states)
if self.embedding_fn is not None:
dec_input = self.embedding_fn(trg_word, trg_pos)
outputs = self.decoder(dec_input, enc_output, None,
trg_src_attn_bias, states)
else:
outputs = self.decoder(trg_word, trg_pos, enc_output, None,
trg_src_attn_bias, states)
if self.output_fn is not None:
outputs = self.output_fn(outputs)
if len(outputs.shape) == 3:
# squeeze to adapt to BeamSearchDecoder which use 2D logits
outputs = layers.squeeze(outputs, [1])
new_states = [{"k": cache["k"], "v": cache["v"]} for cache in states]
return logits, new_states
return outputs, new_states
@property
def state_shape(self):
return [{
"k": [self.n_head, 0, self.d_key],
"v": [self.n_head, 0, self.d_value],
} for i in range(len(self.n_layer))]
class TransformerBeamSearchDecoder(layers.BeamSearchDecoder):
......@@ -1521,6 +1541,11 @@ class TransformerDecoder(Layer):
preprocess_cmd, postprocess_cmd):
super(TransformerDecoder, self).__init__()
self.n_layer = n_layer
self.n_head = n_head
self.d_key = d_key
self.d_value = d_value
self.decoder_layers = list()
for i in range(n_layer):
self.decoder_layers.append(
......@@ -1555,6 +1580,20 @@ class TransformerDecoder(Layer):
for decoder_layer in self.decoder_layers
]
def prepare_incremental_cache(self, enc_output):
return [{
"k": layers.fill_constant_batch_size_like(
input=enc_output,
shape=[-1, self.n_head, 0, self.d_key],
dtype=enc_output.dtype,
value=0),
"v": layers.fill_constant_batch_size_like(
input=enc_output,
shape=[-1, self.n_head, 0, self.d_value],
dtype=enc_output.dtype,
value=0),
} for i in range(self.n_layer)]
#TODO: we should merge GRUCell with BasicGRUCell
class GRUCell(RNNCell):
......@@ -1651,9 +1690,9 @@ class BiGRU(fluid.dygraph.Layer):
return bi_merge
class Linear_chain_crf(fluid.dygraph.Layer):
class LinearChainCRF(Layer):
def __init__(self, param_attr, size=None, is_test=False, dtype='float32'):
super(Linear_chain_crf, self).__init__()
super(LinearChainCRF, self).__init__()
self._param_attr = param_attr
self._dtype = dtype
......@@ -1702,9 +1741,9 @@ class Linear_chain_crf(fluid.dygraph.Layer):
return log_likelihood
class Crf_decoding(fluid.dygraph.Layer):
class CRFDecoding(Layer):
def __init__(self, param_attr, size=None, is_test=False, dtype='float32'):
super(Crf_decoding, self).__init__()
super(CRFDecoding, self).__init__()
self._dtype = dtype
self._size = size
......@@ -1742,7 +1781,7 @@ class Crf_decoding(fluid.dygraph.Layer):
return viterbi_path
class GRUEncoderLayer(Layer):
class GRUEncoder(Layer):
def __init__(self,
input_dim,
grnn_hidden_dim,
......@@ -1750,7 +1789,7 @@ class GRUEncoderLayer(Layer):
num_layers=1,
h_0=None,
is_bidirection=False):
super(GRUEncoderLayer, self).__init__()
super(GRUEncoder, self).__init__()
self.h_0 = h_0
self.num_layers = num_layers
self.is_bidirection = is_bidirection
......@@ -1849,7 +1888,7 @@ class SequenceTagging(fluid.dygraph.Layer):
force_cpu=True,
name='h_0')
self.gru_encoder = GRUEncoderLayer(
self.gru_encoder = GRUEncoder(
input_dim=self.grnn_hidden_dim,
grnn_hidden_dim=self.grnn_hidden_dim,
init_bound=self.init_bound,
......@@ -1866,12 +1905,12 @@ class SequenceTagging(fluid.dygraph.Layer):
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.linear_chain_crf = Linear_chain_crf(
self.linear_chain_crf = LinearChainCRF(
param_attr=fluid.ParamAttr(
name='linear_chain_crfw', learning_rate=self.crf_lr),
size=self.num_labels)
self.crf_decoding = Crf_decoding(
self.crf_decoding = CRFDecoding(
param_attr=fluid.ParamAttr(
name='crfw', learning_rate=self.crf_lr),
size=self.num_labels)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册