提交 60917f41 编写于 作者: G guosheng

Add test for text.py

上级 61e218d0
...@@ -18,10 +18,10 @@ import numpy as np ...@@ -18,10 +18,10 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer, to_variable from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
from hapi.model import Model, CrossEntropy, Loss from hapi.model import Model, CrossEntropy, Loss
from hapi.text import TransformerBeamSearchDecoder, DynamicDecode from hapi.text import TransformerCell, TransformerBeamSearchDecoder, DynamicDecode
def position_encoding_init(n_position, d_pos_vec): def position_encoding_init(n_position, d_pos_vec):
...@@ -606,26 +606,6 @@ class Transformer(Model): ...@@ -606,26 +606,6 @@ class Transformer(Model):
return predict return predict
class TransfomerCell(object):
"""
Let inputs=(trg_word, trg_pos), states=cache to make Transformer can be
used as RNNCell
"""
def __init__(self, decoder):
self.decoder = decoder
def __call__(self, inputs, states, trg_src_attn_bias, enc_output,
static_caches):
trg_word, trg_pos = inputs
for cache, static_cache in zip(states, static_caches):
cache.update(static_cache)
logits = self.decoder(trg_word, trg_pos, None, trg_src_attn_bias,
enc_output, states)
new_states = [{"k": cache["k"], "v": cache["v"]} for cache in states]
return logits, new_states
class InferTransformer(Transformer): class InferTransformer(Transformer):
""" """
model for prediction model for prediction
...@@ -657,7 +637,7 @@ class InferTransformer(Transformer): ...@@ -657,7 +637,7 @@ class InferTransformer(Transformer):
self.beam_size = args.pop("beam_size") self.beam_size = args.pop("beam_size")
self.max_out_len = args.pop("max_out_len") self.max_out_len = args.pop("max_out_len")
super(InferTransformer, self).__init__(**args) super(InferTransformer, self).__init__(**args)
cell = TransfomerCell(self.decoder) cell = TransformerCell(self.decoder)
self.beam_search_decoder = DynamicDecode( self.beam_search_decoder = DynamicDecode(
TransformerBeamSearchDecoder( TransformerBeamSearchDecoder(
cell, bos_id, eos_id, beam_size, var_dim_in_state=2), cell, bos_id, eos_id, beam_size, var_dim_in_state=2),
......
...@@ -38,7 +38,7 @@ from hapi.loss import Loss ...@@ -38,7 +38,7 @@ from hapi.loss import Loss
from hapi.distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized from hapi.distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized
from hapi.metrics import Metric from hapi.metrics import Metric
from hapi.callbacks import config_callbacks from hapi.callbacks import config_callbacks
from hapi.utils import to_list, to_numpy, flatten_list, restore_flatten_list from hapi.utils import to_list, to_numpy, flatten_list, restore_flatten_list, extract_args
__all__ = [ __all__ = [
'Model', 'Model',
...@@ -495,14 +495,15 @@ class DynamicGraphAdapter(object): ...@@ -495,14 +495,15 @@ class DynamicGraphAdapter(object):
if labels is not None: if labels is not None:
labels = [to_variable(l) for l in to_list(labels)] labels = [to_variable(l) for l in to_list(labels)]
if self._nranks > 1: if self._nranks > 1:
outputs = self.ddp_model.forward(*[to_variable(x) for x in inputs]) outputs = self.ddp_model.forward(
* [to_variable(x) for x in inputs])
losses = self.model._loss_function(outputs, labels) losses = self.model._loss_function(outputs, labels)
final_loss = fluid.layers.sum(losses) final_loss = fluid.layers.sum(losses)
final_loss = self.ddp_model.scale_loss(final_loss) final_loss = self.ddp_model.scale_loss(final_loss)
final_loss.backward() final_loss.backward()
self.ddp_model.apply_collective_grads() self.ddp_model.apply_collective_grads()
else: else:
outputs = self.model.forward(*[to_variable(x) for x in inputs]) outputs = self.model.forward(* [to_variable(x) for x in inputs])
losses = self.model._loss_function(outputs, labels) losses = self.model._loss_function(outputs, labels)
final_loss = fluid.layers.sum(losses) final_loss = fluid.layers.sum(losses)
final_loss.backward() final_loss.backward()
...@@ -511,9 +512,9 @@ class DynamicGraphAdapter(object): ...@@ -511,9 +512,9 @@ class DynamicGraphAdapter(object):
self.model.clear_gradients() self.model.clear_gradients()
metrics = [] metrics = []
for metric in self.model._metrics: for metric in self.model._metrics:
metric_outs = metric.add_metric_op(*( metric_outs = metric.add_metric_op(*(to_list(outputs) + to_list(
to_list(outputs) + to_list(labels))) labels)))
m = metric.update(*[to_numpy(m) for m in to_list(metric_outs)]) m = metric.update(* [to_numpy(m) for m in to_list(metric_outs)])
metrics.append(m) metrics.append(m)
return ([to_numpy(l) for l in losses], metrics) \ return ([to_numpy(l) for l in losses], metrics) \
...@@ -525,7 +526,7 @@ class DynamicGraphAdapter(object): ...@@ -525,7 +526,7 @@ class DynamicGraphAdapter(object):
inputs = to_list(inputs) inputs = to_list(inputs)
if labels is not None: if labels is not None:
labels = [to_variable(l) for l in to_list(labels)] labels = [to_variable(l) for l in to_list(labels)]
outputs = self.model.forward(*[to_variable(x) for x in inputs]) outputs = self.model.forward(* [to_variable(x) for x in inputs])
if self.model._loss_function: if self.model._loss_function:
losses = self.model._loss_function(outputs, labels) losses = self.model._loss_function(outputs, labels)
else: else:
...@@ -551,9 +552,9 @@ class DynamicGraphAdapter(object): ...@@ -551,9 +552,9 @@ class DynamicGraphAdapter(object):
self._merge_count[self.mode + '_total'] += samples self._merge_count[self.mode + '_total'] += samples
self._merge_count[self.mode + '_batch'] = samples self._merge_count[self.mode + '_batch'] = samples
metric_outs = metric.add_metric_op(*( metric_outs = metric.add_metric_op(*(to_list(outputs) + to_list(
to_list(outputs) + to_list(labels))) labels)))
m = metric.update(*[to_numpy(m) for m in to_list(metric_outs)]) m = metric.update(* [to_numpy(m) for m in to_list(metric_outs)])
metrics.append(m) metrics.append(m)
# To be consistent with static graph # To be consistent with static graph
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# when test, you should add hapi root path to the PYTHONPATH,
# export PYTHONPATH=PATH_TO_HAPI:$PYTHONPATH
import unittest
import time
import random
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Embedding, Linear, Layer
from paddle.fluid.layers import BeamSearchDecoder
import hapi.text as text
from hapi.model import Model, Input, set_device
from hapi.text import BasicLSTMCell, BasicGRUCell, RNN, DynamicDecode, MultiHeadAttention, TransformerEncoder
from hapi.text import *
def sigmoid(x):
return 1. / (1. + np.exp(-x))
def tanh(x):
return 2. * sigmoid(2. * x) - 1.
def lstm_step(step_in, pre_hidden, pre_cell, gate_w, gate_b, forget_bias=1.0):
concat_1 = np.concatenate([step_in, pre_hidden], 1)
gate_input = np.matmul(concat_1, gate_w)
gate_input += gate_b
i, j, f, o = np.split(gate_input, indices_or_sections=4, axis=1)
new_cell = pre_cell * sigmoid(f + forget_bias) + sigmoid(i) * tanh(j)
new_hidden = tanh(new_cell) * sigmoid(o)
return new_hidden, new_cell
def gru_step(step_in, pre_hidden, gate_w, gate_b, candidate_w, candidate_b):
concat_1 = np.concatenate([step_in, pre_hidden], 1)
gate_input = np.matmul(concat_1, gate_w)
gate_input += gate_b
gate_input = sigmoid(gate_input)
r, u = np.split(gate_input, indices_or_sections=2, axis=1)
r_hidden = r * pre_hidden
candidate = np.matmul(np.concatenate([step_in, r_hidden], 1), candidate_w)
candidate += candidate_b
c = tanh(candidate)
new_hidden = u * pre_hidden + (1 - u) * c
return new_hidden
class ModuleApiTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls._np_rand_state = np.random.get_state()
cls._py_rand_state = random.getstate()
cls._random_seed = 123
np.random.seed(cls._random_seed)
random.seed(cls._random_seed)
cls.model_cls = type(cls.__name__ + "Model", (Model, ), {
"__init__": cls.model_init_wrapper(cls.model_init),
"forward": cls.model_forward
})
@classmethod
def tearDownClass(cls):
np.random.set_state(cls._np_rand_state)
random.setstate(cls._py_rand_state)
@staticmethod
def model_init_wrapper(func):
def __impl__(self, *args, **kwargs):
Model.__init__(self)
func(self, *args, **kwargs)
return __impl__
@staticmethod
def model_init(self, *args, **kwargs):
raise NotImplementedError(
"model_init acts as `Model.__init__`, thus must implement it")
@staticmethod
def model_forward(self, *args, **kwargs):
return self.module(*args, **kwargs)
def make_inputs(self):
# TODO(guosheng): add default from `self.inputs`
raise NotImplementedError(
"model_inputs makes inputs for model, thus must implement it")
def setUp(self):
"""
For the model which wraps the module to be tested:
Set input data by `self.inputs` list
Set init argument values by `self.attrs` list/dict
Set model parameter values by `self.param_states` dict
Set expected output data by `self.outputs` list
We can create a model instance and run once with these.
"""
self.inputs = []
self.attrs = {}
self.param_states = {}
self.outputs = []
def _calc_output(self, place, mode="test", dygraph=True):
if dygraph:
fluid.enable_dygraph(place)
else:
fluid.disable_dygraph()
fluid.default_main_program().random_seed = self._random_seed
fluid.default_startup_program().random_seed = self._random_seed
model = self.model_cls(**self.attrs) if isinstance(
self.attrs, dict) else self.model_cls(*self.attrs)
model.prepare(inputs=self.make_inputs(), device=place)
if self.param_states:
model.load(self.param_states, optim_state=None)
return model.test_batch(self.inputs)
def check_output_with_place(self, place, mode="test"):
dygraph_output = self._calc_output(place, mode, dygraph=True)
stgraph_output = self._calc_output(place, mode, dygraph=False)
expect_output = getattr(self, "outputs", None)
for actual_t, expect_t in zip(dygraph_output, stgraph_output):
self.assertTrue(np.allclose(actual_t, expect_t, rtol=1e-5, atol=0))
if expect_output:
for actual_t, expect_t in zip(dygraph_output, expect_output):
self.assertTrue(
np.allclose(
actual_t, expect_t, rtol=1e-5, atol=0))
def check_output(self):
devices = ["CPU", "GPU"] if fluid.is_compiled_with_cuda() else ["CPU"]
for device in devices:
place = set_device(device)
self.check_output_with_place(place)
class TestBasicLSTM(ModuleApiTest):
def setUp(self):
# TODO(guosheng): Change to big size. Currentlys bigger hidden size for
# LSTM would fail, the second static graph run might get diff output
# with others.
shape = (2, 4, 16)
self.inputs = [np.random.random(shape).astype("float32")]
self.outputs = None
self.attrs = {"input_size": 16, "hidden_size": 16}
self.param_states = {}
@staticmethod
def model_init(self, input_size, hidden_size):
self.lstm = RNN(
BasicLSTMCell(
input_size,
hidden_size,
param_attr=fluid.ParamAttr(name="lstm_weight"),
bias_attr=fluid.ParamAttr(name="lstm_bias")))
@staticmethod
def model_forward(self, inputs):
return self.lstm(inputs)[0]
def make_inputs(self):
inputs = [
Input(
[None, None, self.inputs[-1].shape[-1]],
"float32",
name="input")
]
return inputs
def test_check_output(self):
self.check_output()
class TestBasicGRU(ModuleApiTest):
def setUp(self):
shape = (2, 4, 128)
self.inputs = [np.random.random(shape).astype("float32")]
self.outputs = None
self.attrs = {"input_size": 128, "hidden_size": 128}
self.param_states = {}
@staticmethod
def model_init(self, input_size, hidden_size):
self.gru = RNN(BasicGRUCell(input_size, hidden_size))
@staticmethod
def model_forward(self, inputs):
return self.gru(inputs)[0]
def make_inputs(self):
inputs = [
Input(
[None, None, self.inputs[-1].shape[-1]],
"float32",
name="input")
]
return inputs
def test_check_output(self):
self.check_output()
class TestBeamSearch(ModuleApiTest):
def setUp(self):
shape = (8, 32)
self.inputs = [
np.random.random(shape).astype("float32"),
np.random.random(shape).astype("float32")
]
self.outputs = None
self.attrs = {
"vocab_size": 100,
"embed_dim": 32,
"hidden_size": 32,
}
self.param_states = {}
@staticmethod
def model_init(self,
vocab_size,
embed_dim,
hidden_size,
bos_id=0,
eos_id=1,
beam_size=4,
max_step_num=20):
embedder = Embedding(size=[vocab_size, embed_dim])
output_layer = Linear(hidden_size, vocab_size)
cell = BasicLSTMCell(embed_dim, hidden_size)
decoder = BeamSearchDecoder(
cell,
start_token=bos_id,
end_token=eos_id,
beam_size=beam_size,
embedding_fn=embedder,
output_fn=output_layer)
self.beam_search_decoder = DynamicDecode(
decoder, max_step_num=max_step_num, is_test=True)
@staticmethod
def model_forward(self, init_hidden, init_cell):
return self.beam_search_decoder([init_hidden, init_cell])[0]
def make_inputs(self):
inputs = [
Input(
[None, self.inputs[0].shape[-1]],
"float32",
name="init_hidden"), Input(
[None, self.inputs[1].shape[-1]],
"float32",
name="init_cell")
]
return inputs
def test_check_output(self):
self.check_output()
class TestTransformerEncoder(ModuleApiTest):
def setUp(self):
self.inputs = [
# encoder input: [batch_size, seq_len, hidden_size]
np.random.random([2, 4, 512]).astype("float32"),
# self attention bias: [batch_size, n_head, seq_len, seq_len]
np.random.randint(0, 1, [2, 8, 4, 4]).astype("float32") * -1e9
]
self.outputs = None
self.attrs = {
"n_layer": 2,
"n_head": 8,
"d_key": 64,
"d_value": 64,
"d_model": 512,
"d_inner_hid": 1024
}
self.param_states = {}
@staticmethod
def model_init(self,
n_layer,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
prepostprocess_dropout=0.1,
attention_dropout=0.1,
relu_dropout=0.1,
preprocess_cmd="n",
postprocess_cmd="da",
ffn_fc1_act="relu"):
self.encoder = TransformerEncoder(
n_layer, n_head, d_key, d_value, d_model, d_inner_hid,
prepostprocess_dropout, attention_dropout, relu_dropout,
preprocess_cmd, postprocess_cmd, ffn_fc1_act)
@staticmethod
def model_forward(self, enc_input, attn_bias):
return self.encoder(enc_input, attn_bias)
def make_inputs(self):
inputs = [
Input(
[None, None, self.inputs[0].shape[-1]],
"float32",
name="enc_input"), Input(
[None, self.inputs[1].shape[1], None, None],
"float32",
name="attn_bias")
]
return inputs
def test_check_output(self):
self.check_output()
class TestTransformerDecoder(TestTransformerEncoder):
def setUp(self):
self.inputs = [
# decoder input: [batch_size, seq_len, hidden_size]
np.random.random([2, 4, 512]).astype("float32"),
# encoder output: [batch_size, seq_len, hidden_size]
np.random.random([2, 5, 512]).astype("float32"),
# self attention bias: [batch_size, n_head, seq_len, seq_len]
np.random.randint(0, 1, [2, 8, 4, 4]).astype("float32") * -1e9,
# cross attention bias: [batch_size, n_head, seq_len, seq_len]
np.random.randint(0, 1, [2, 8, 4, 5]).astype("float32") * -1e9
]
self.outputs = None
self.attrs = {
"n_layer": 2,
"n_head": 8,
"d_key": 64,
"d_value": 64,
"d_model": 512,
"d_inner_hid": 1024
}
self.param_states = {}
@staticmethod
def model_init(self,
n_layer,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
prepostprocess_dropout=0.1,
attention_dropout=0.1,
relu_dropout=0.1,
preprocess_cmd="n",
postprocess_cmd="da"):
self.decoder = TransformerDecoder(
n_layer, n_head, d_key, d_value, d_model, d_inner_hid,
prepostprocess_dropout, attention_dropout, relu_dropout,
preprocess_cmd, postprocess_cmd)
@staticmethod
def model_forward(self,
dec_input,
enc_output,
self_attn_bias,
cross_attn_bias,
caches=None):
return self.decoder(dec_input, enc_output, self_attn_bias,
cross_attn_bias, caches)
def make_inputs(self):
inputs = [
Input(
[None, None, self.inputs[0].shape[-1]],
"float32",
name="dec_input"), Input(
[None, None, self.inputs[0].shape[-1]],
"float32",
name="enc_output"), Input(
[None, self.inputs[-1].shape[1], None, None],
"float32",
name="self_attn_bias"), Input(
[None, self.inputs[-1].shape[1], None, None],
"float32",
name="cross_attn_bias")
]
return inputs
def test_check_output(self):
self.check_output()
class TestTransformerBeamSearchDecoder(ModuleApiTest):
def setUp(self):
shape = (8, 32)
self.inputs = [
np.random.random(shape).astype("float32"),
np.random.random(shape).astype("float32")
]
self.outputs = None
self.attrs = {
"vocab_size": 100,
"embed_dim": 32,
"hidden_size": 32,
}
self.param_states = {}
@staticmethod
def model_init(self,
vocab_size,
n_layer,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
prepostprocess_dropout=0.1,
attention_dropout=0.1,
relu_dropout=0.1,
preprocess_cmd="n",
postprocess_cmd="da",
bos_id=0,
eos_id=1,
beam_size=4,
max_step_num=20):
embedder = Embedding(size=[vocab_size, d_model])
output_layer = Linear(d_model, vocab_size)
decoder = TransformerDecoder(n_layer, n_head, d_key, d_value, d_model,
d_inner_hid, prepostprocess_dropout,
attention_dropout, relu_dropout,
preprocess_cmd, postprocess_cmd)
transformer_cell = TransformerCell(decoder)
self.beam_search_decoder = DynamicDecode(
TransformerBeamSearchDecoder(
transformer_cell,
bos_id,
eos_id,
beam_size,
var_dim_in_state=2),
max_step_num,
is_test=True)
@staticmethod
def model_forward(self, enc_output, trg_src_attn_bias):
caches = [{
"k": layers.fill_constant_batch_size_like(
input=enc_output,
shape=[-1, self.n_head, 0, self.d_key],
dtype=enc_output.dtype,
value=0),
"v": layers.fill_constant_batch_size_like(
input=enc_output,
shape=[-1, self.n_head, 0, self.d_value],
dtype=enc_output.dtype,
value=0),
} for i in range(self.n_layer)]
enc_output = TransformerBeamSearchDecoder.tile_beam_merge_with_batch(
enc_output, self.beam_size)
trg_src_attn_bias = TransformerBeamSearchDecoder.tile_beam_merge_with_batch(
trg_src_attn_bias, self.beam_size)
static_caches = self.decoder.decoder.prepare_static_cache(enc_output)
rs, _ = self.beam_search_decoder(
inits=caches,
enc_output=enc_output,
trg_src_attn_bias=trg_src_attn_bias,
static_caches=static_caches)
return rs
def make_inputs(self):
inputs = [
Input(
[None, self.inputs[0].shape[-1]],
"float32",
name="init_hidden"), Input(
[None, self.inputs[1].shape[-1]],
"float32",
name="init_cell")
]
return inputs
def test_check_output(self):
self.check_output()
if __name__ == '__main__':
unittest.main()
...@@ -48,8 +48,8 @@ __all__ = [ ...@@ -48,8 +48,8 @@ __all__ = [
'RNNCell', 'BasicLSTMCell', 'BasicGRUCell', 'RNN', 'DynamicDecode', 'RNNCell', 'BasicLSTMCell', 'BasicGRUCell', 'RNN', 'DynamicDecode',
'BeamSearchDecoder', 'MultiHeadAttention', 'FFN', 'BeamSearchDecoder', 'MultiHeadAttention', 'FFN',
'TransformerEncoderLayer', 'TransformerEncoder', 'TransformerDecoderLayer', 'TransformerEncoderLayer', 'TransformerEncoder', 'TransformerDecoderLayer',
'TransformerDecoder', 'TransformerBeamSearchDecoder', 'Linear_chain_crf', 'TransformerDecoder', 'TransformerCell', 'TransformerBeamSearchDecoder',
'Crf_decoding', 'SequenceTagging', 'GRUEncoderLayer' 'Linear_chain_crf', 'Crf_decoding', 'SequenceTagging', 'GRUEncoderLayer'
] ]
...@@ -1002,7 +1002,7 @@ class DynamicDecode(Layer): ...@@ -1002,7 +1002,7 @@ class DynamicDecode(Layer):
**kwargs) **kwargs)
class TransfomerCell(object): class TransformerCell(Layer):
""" """
Let inputs=(trg_word, trg_pos), states=cache to make Transformer can be Let inputs=(trg_word, trg_pos), states=cache to make Transformer can be
used as RNNCell used as RNNCell
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册