未验证 提交 ed262b80 编写于 作者: A Aurelius84 提交者: GitHub

[Dy2stat] Add Sentiment and LAC model for unittest (#25071)

* add sentiment unittest

* add LAC model

* add test=develop

* rename tmp test=develop

* fix timeout test=develop

* undo tmp_var test=develop
上级 bc2bd3c1
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import math
import time
import numpy as np
import unittest
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph import Embedding, Linear, GRUUnit
from paddle.fluid.dygraph import declarative, ProgramTranslator
SEED = 2020
program_translator = ProgramTranslator()
class DynamicGRU(fluid.dygraph.Layer):
def __init__(self,
size,
h_0=None,
param_attr=None,
bias_attr=None,
is_reverse=False,
gate_activation='sigmoid',
candidate_activation='tanh',
origin_mode=False,
init_size=None):
super(DynamicGRU, self).__init__()
self.gru_unit = GRUUnit(
size * 3,
param_attr=param_attr,
bias_attr=bias_attr,
activation=candidate_activation,
gate_activation=gate_activation,
origin_mode=origin_mode)
self.size = size
self.h_0 = h_0
self.is_reverse = is_reverse
def forward(self, inputs):
# Use `to_variable` to create a copy of global h_0 created not in `DynamicGRU`,
# to avoid modify it because `h_0` is both used in other `DynamicGRU`.
hidden = to_variable(self.h_0)
hidden.stop_gradient = True
res = []
for i in range(inputs.shape[1]):
if self.is_reverse:
j = fluid.layers.shape(inputs)[1] - 1 - i
else:
# TODO(Aurelius84): In while block, if the var created in parent block
# participates in the calculation of gradient, the result of gradient
# is incorrect because each step scope always returns the same value
# generated by last step. Here we add 0 to create `j` in while block to
# avoid this bug, and working on fixing it in next PR.
j = i + 0
# FIXME(Aurelius84): see above explanation.
hidden = fluid.layers.scale(hidden, 1)
# See above explanation.
# input_ = inputs[:, i:i+1, :] # original code
input_ = fluid.layers.slice(
inputs, axes=[1], starts=[j], ends=[j + 1])
input_ = fluid.layers.reshape(
input_, [-1, input_.shape[2]], inplace=False)
hidden, reset, gate = self.gru_unit(input_, hidden)
hidden_ = fluid.layers.reshape(
hidden, [-1, 1, hidden.shape[1]], inplace=False)
res.append(hidden_)
if self.is_reverse:
res = res[::-1]
res = fluid.layers.concat(res, axis=1)
return res
class BiGRU(fluid.dygraph.Layer):
def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None):
super(BiGRU, self).__init__()
self.pre_gru = Linear(
input_dim=input_dim,
output_dim=grnn_hidden_dim * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.gru = DynamicGRU(
size=grnn_hidden_dim,
h_0=h_0,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.pre_gru_r = Linear(
input_dim=input_dim,
output_dim=grnn_hidden_dim * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.gru_r = DynamicGRU(
size=grnn_hidden_dim,
is_reverse=True,
h_0=h_0,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
def forward(self, input_feature):
res_pre_gru = self.pre_gru(input_feature)
res_gru = self.gru(res_pre_gru)
res_pre_gru_r = self.pre_gru_r(input_feature)
res_gru_r = self.gru_r(res_pre_gru_r)
bi_merge = fluid.layers.concat(input=[res_gru, res_gru_r], axis=-1)
return bi_merge
class LinearChainCRF(fluid.dygraph.Layer):
def __init__(self, param_attr, size=None, is_test=False, dtype='float32'):
super(LinearChainCRF, self).__init__()
self._param_attr = param_attr
self._dtype = dtype
self._size = size
self._is_test = is_test
self._transition = self.create_parameter(
attr=self._param_attr,
shape=[self._size + 2, self._size],
dtype=self._dtype)
@property
def weight(self):
return self._transition
@weight.setter
def weight(self, value):
self._transition = value
def forward(self, input, label, length=None):
alpha = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
emission_exps = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
transition_exps = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
log_likelihood = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
this_inputs = {
"Emission": [input],
"Transition": self._transition,
"Label": [label]
}
if length is not None:
this_inputs['Length'] = [length]
self._helper.append_op(
type='linear_chain_crf',
inputs=this_inputs,
outputs={
"Alpha": [alpha],
"EmissionExps": [emission_exps],
"TransitionExps": transition_exps,
"LogLikelihood": log_likelihood
},
attrs={"is_test": self._is_test, })
return log_likelihood
class CRFDecoding(fluid.dygraph.Layer):
def __init__(self, param_attr, size=None, is_test=False, dtype='float32'):
super(CRFDecoding, self).__init__()
self._dtype = dtype
self._size = size
self._is_test = is_test
self._param_attr = param_attr
self._transition = self.create_parameter(
attr=self._param_attr,
shape=[self._size + 2, self._size],
dtype=self._dtype)
@property
def weight(self):
return self._transition
@weight.setter
def weight(self, value):
self._transition = value
def forward(self, input, label=None, length=None):
viterbi_path = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
this_inputs = {
"Emission": [input],
"Transition": self._transition,
"Label": label
}
if length is not None:
this_inputs['Length'] = [length]
self._helper.append_op(
type='crf_decoding',
inputs=this_inputs,
outputs={"ViterbiPath": [viterbi_path]},
attrs={"is_test": self._is_test, })
return viterbi_path
class ChunkEval(fluid.dygraph.Layer):
def __init__(self, num_chunk_types, chunk_scheme,
excluded_chunk_types=None):
super(ChunkEval, self).__init__()
self.num_chunk_types = num_chunk_types
self.chunk_scheme = chunk_scheme
self.excluded_chunk_types = excluded_chunk_types
def forward(self, input, label, seq_length=None):
precision = self._helper.create_variable_for_type_inference(
dtype="float32")
recall = self._helper.create_variable_for_type_inference(
dtype="float32")
f1_score = self._helper.create_variable_for_type_inference(
dtype="float32")
num_infer_chunks = self._helper.create_variable_for_type_inference(
dtype="int64")
num_label_chunks = self._helper.create_variable_for_type_inference(
dtype="int64")
num_correct_chunks = self._helper.create_variable_for_type_inference(
dtype="int64")
this_input = {"Inference": [input], "Label": [label]}
if seq_length is not None:
this_input["SeqLength"] = [seq_length]
self._helper.append_op(
type='chunk_eval',
inputs=this_input,
outputs={
"Precision": [precision],
"Recall": [recall],
"F1-Score": [f1_score],
"NumInferChunks": [num_infer_chunks],
"NumLabelChunks": [num_label_chunks],
"NumCorrectChunks": [num_correct_chunks]
},
attrs={
"num_chunk_types": self.num_chunk_types,
"chunk_scheme": self.chunk_scheme,
"excluded_chunk_types": self.excluded_chunk_types or []
})
return (precision, recall, f1_score, num_infer_chunks, num_label_chunks,
num_correct_chunks)
class LexNet(fluid.dygraph.Layer):
def __init__(self, args, length=None):
super(LexNet, self).__init__()
"""
define the lexical analysis network structure
word: stores the input of the model
for_infer: a boolean value, indicating if the model to be created is for training or predicting.
return:
for infer: return the prediction
otherwise: return the prediction
"""
self.word_emb_dim = args.word_emb_dim
self.vocab_size = args.vocab_size
self.num_labels = args.num_labels
self.grnn_hidden_dim = args.grnn_hidden_dim
self.emb_lr = args.emb_learning_rate if 'emb_learning_rate' in dir(
args) else 1.0
self.crf_lr = args.emb_learning_rate if 'crf_learning_rate' in dir(
args) else 1.0
self.bigru_num = args.bigru_num
self.init_bound = 0.1
self.word_embedding = Embedding(
size=[self.vocab_size, self.word_emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(
learning_rate=self.emb_lr,
name="word_emb",
initializer=fluid.initializer.Uniform(
low=-self.init_bound, high=self.init_bound)))
h_0 = np.zeros((args.batch_size, self.grnn_hidden_dim), dtype="float32")
h_0 = to_variable(h_0)
self.bigru_units = []
for i in range(self.bigru_num):
if i == 0:
self.bigru_units.append(
self.add_sublayer(
"bigru_units%d" % i,
BiGRU(
self.grnn_hidden_dim,
self.grnn_hidden_dim,
self.init_bound,
h_0=h_0)))
else:
self.bigru_units.append(
self.add_sublayer(
"bigru_units%d" % i,
BiGRU(
self.grnn_hidden_dim * 2,
self.grnn_hidden_dim,
self.init_bound,
h_0=h_0)))
self.fc = Linear(
input_dim=self.grnn_hidden_dim * 2,
output_dim=self.num_labels,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-self.init_bound, high=self.init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.linear_chain_crf = LinearChainCRF(
param_attr=fluid.ParamAttr(
name='linear_chain_crfw', learning_rate=self.crf_lr),
size=self.num_labels)
self.crf_decoding = CRFDecoding(
param_attr=fluid.ParamAttr(
name='crfw', learning_rate=self.crf_lr),
size=self.num_labels)
# share weight
self.crf_decoding.weight = self.linear_chain_crf.weight
@declarative
def forward(self, word, target, length=None):
"""
Configure the network
"""
word_embed = self.word_embedding(word)
input_feature = word_embed
for i in range(self.bigru_num):
bigru_output = self.bigru_units[i](input_feature)
input_feature = bigru_output
emission = self.fc(bigru_output)
crf_cost = self.linear_chain_crf(
input=emission, label=target, length=length)
avg_cost = fluid.layers.mean(x=crf_cost)
crf_decode = self.crf_decoding(input=emission, length=length)
return avg_cost, crf_decode
class Args(object):
epoch = 1
batch_size = 4
vocab_size = 100
num_labels = 10
word_emb_dim = 128
grnn_hidden_dim = 128
base_learning_rate = 0.01
bigru_num = 2
print_steps = 1
model_save_dir = "./lac_model"
dy_param_path = "./lac_dy_param"
def get_random_input_data(batch_size, vocab_size, num_labels, max_seq_len=64):
local_random = np.random.RandomState(SEED)
padding_id = np.int64(0)
iter_num = 5
def __reader__():
batch, init_lens = [], []
for i in range(iter_num * batch_size):
cur_len = local_random.randint(3, max_seq_len)
word_ids = local_random.randint(0, vocab_size,
[cur_len]).astype('int64').tolist()
label_ids = local_random.randint(0, num_labels,
[cur_len]).astype('int64').tolist()
batch.append((word_ids, label_ids))
init_lens.append(cur_len)
if len(batch) == batch_size:
batch_max_len = min(max(init_lens), max_seq_len)
new_batch = []
for words_len, (word_ids, label_ids) in zip(init_lens, batch):
word_ids = word_ids[0:batch_max_len]
words_len = np.int64(len(word_ids))
word_ids += [
padding_id for _ in range(batch_max_len - words_len)
]
label_ids = label_ids[0:batch_max_len]
label_ids += [
padding_id for _ in range(batch_max_len - words_len)
]
assert len(word_ids) == len(label_ids)
new_batch.append((word_ids, label_ids, words_len))
yield new_batch
batch, init_lens = [], []
return __reader__
def create_dataloader(reader, place):
data_loader = fluid.io.DataLoader.from_generator(
capacity=16, use_double_buffer=True, iterable=True)
data_loader.set_sample_list_generator(reader, places=place)
return data_loader
def do_train(args, to_static):
program_translator.enable(to_static)
place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.dygraph.guard(place):
fluid.default_startup_program().random_seed = SEED
fluid.default_main_program().random_seed = SEED
reader = get_random_input_data(args.batch_size, args.vocab_size,
args.num_labels)
train_loader = create_dataloader(reader, place)
model = LexNet(args)
optimizer = fluid.optimizer.AdamOptimizer(
learning_rate=args.base_learning_rate,
parameter_list=model.parameters())
chunk_eval = ChunkEval(
int(math.ceil((args.num_labels - 1) / 2.0)), "IOB")
step = 0
chunk_evaluator = fluid.metrics.ChunkEvaluator()
chunk_evaluator.reset()
loss_data = []
for epoch_id in range(args.epoch):
for batch in train_loader():
words, targets, length = batch
start_time = time.time()
avg_cost, crf_decode = model(words, targets, length)
loss_data.append(avg_cost.numpy()[0])
# backward and optimization
avg_cost.backward()
optimizer.minimize(avg_cost)
model.clear_gradients()
end_time = time.time()
if step % args.print_steps == 0:
(precision, recall, f1_score, num_infer_chunks,
num_label_chunks, num_correct_chunks) = chunk_eval(
input=crf_decode, label=targets, seq_length=length)
outputs = [avg_cost, precision, recall, f1_score]
avg_cost, precision, recall, f1_score = [
np.mean(x.numpy()) for x in outputs
]
print(
"[train] step = %d, loss = %f, P: %f, R: %f, F1: %f, elapsed time %f"
% (step, avg_cost, precision, recall, f1_score,
end_time - start_time))
step += 1
# save inference model
if to_static:
program_translator.save_inference_model(
dirname=args.model_save_dir, feed=[0, 2], fetch=[1])
else:
fluid.dygraph.save_dygraph(model.state_dict(), args.dy_param_path)
return np.array(loss_data)
class TestLACModel(unittest.TestCase):
def setUp(self):
self.args = Args()
self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda(
) else fluid.CPUPlace()
def train(self, to_static):
out = do_train(self.args, to_static)
return out
def test_train(self):
dy_out = self.train(to_static=False)
st_out = self.train(to_static=True)
self.assertTrue(
np.allclose(dy_out, st_out),
msg="dygraph output:\n{},\nstatic output:\n {}.".format(dy_out,
st_out))
# Prediction needs trained models, so put `test_predict` at last of `test_train`
self.verify_predict()
def verify_predict(self):
reader = get_random_input_data(
self.args.batch_size, self.args.vocab_size, self.args.num_labels)
for batch in reader():
batch = [np.vstack(var) for var in zip(*batch)]
dy_pre = self.predict_dygraph(batch)
st_pre = self.predict_static(batch)
self.assertTrue(
np.allclose(dy_pre, st_pre),
msg="dy_pre:\n {}\n, st_pre: \n{}.".format(dy_pre, st_pre))
def predict_dygraph(self, batch):
words, targets, length = batch
program_translator.enable(False)
with fluid.dygraph.guard(self.place):
model = LexNet(self.args)
# load dygraph trained parameters
model_dict, _ = fluid.load_dygraph(self.args.dy_param_path +
".pdparams")
model.set_dict(model_dict)
model.eval()
_, pred_res = model(
to_variable(words), to_variable(targets), to_variable(length))
return pred_res.numpy()
def predict_static(self, batch):
"""
LAC model contains h_0 created in `__init__` that is necessary for inferring.
Load inference model to test it's ok for prediction.
"""
exe = fluid.Executor(self.place)
# load inference model
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(
self.args.model_save_dir, executor=exe)
words, targets, length = batch
pred_res = exe.run(
inference_program,
feed={feed_target_names[0]: words,
feed_target_names[1]: length},
fetch_list=fetch_targets)
return pred_res[0]
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import unittest
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, Linear, Embedding
from paddle.fluid.dygraph import to_variable, ProgramTranslator, declarative
from test_lac import DynamicGRU
SEED = 2020
program_translator = ProgramTranslator()
# Note: Set True to eliminate randomness.
# 1. For one operation, cuDNN has several algorithms,
# some algorithm results are non-deterministic, like convolution algorithms.
if fluid.is_compiled_with_cuda():
fluid.set_flags({'FLAGS_cudnn_deterministic': True})
class SimpleConvPool(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
use_cudnn=True,
batch_size=None):
super(SimpleConvPool, self).__init__()
self.batch_size = batch_size
self._conv2d = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
padding=[1, 1],
use_cudnn=use_cudnn,
act='tanh')
def forward(self, inputs):
x = self._conv2d(inputs)
x = fluid.layers.reduce_max(x, dim=-1)
x = fluid.layers.reshape(x, shape=[self.batch_size, -1])
return x
class CNN(fluid.dygraph.Layer):
def __init__(self, dict_dim, batch_size, seq_len):
super(CNN, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.channels = 1
self.win_size = [3, self.hid_dim]
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
is_sparse=False)
self._simple_conv_pool_1 = SimpleConvPool(
self.channels,
self.hid_dim,
self.win_size,
batch_size=self.batch_size)
self._fc1 = Linear(
input_dim=self.hid_dim * self.seq_len,
output_dim=self.fc_hid_dim,
act="softmax")
self._fc_prediction = Linear(
input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax")
@declarative
def forward(self, inputs, label=None):
emb = self.embedding(inputs)
o_np_mask = (
fluid.layers.reshape(inputs, [-1, 1]) != self.dict_dim).astype(
dtype='float32')
mask_emb = fluid.layers.expand(o_np_mask, [1, self.hid_dim])
emb = emb * mask_emb
emb = fluid.layers.reshape(
emb, shape=[-1, self.channels, self.seq_len, self.hid_dim])
conv_3 = self._simple_conv_pool_1(emb)
fc_1 = self._fc1(conv_3)
prediction = self._fc_prediction(fc_1)
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, prediction, acc
class BOW(fluid.dygraph.Layer):
def __init__(self, dict_dim, batch_size, seq_len):
super(BOW, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
is_sparse=False)
self._fc1 = Linear(
input_dim=self.hid_dim, output_dim=self.hid_dim, act="tanh")
self._fc2 = Linear(
input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(
input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax")
@declarative
def forward(self, inputs, label=None):
emb = self.embedding(inputs)
o_np_mask = (
fluid.layers.reshape(inputs, [-1, 1]) != self.dict_dim).astype(
dtype='float32')
mask_emb = fluid.layers.expand(o_np_mask, [1, self.hid_dim])
emb = emb * mask_emb
emb = fluid.layers.reshape(emb, shape=[-1, self.seq_len, self.hid_dim])
bow_1 = fluid.layers.reduce_sum(emb, dim=1)
bow_1 = fluid.layers.tanh(bow_1)
fc_1 = self._fc1(bow_1)
fc_2 = self._fc2(fc_1)
prediction = self._fc_prediction(fc_2)
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, prediction, acc
class GRU(fluid.dygraph.Layer):
def __init__(self, dict_dim, batch_size, seq_len):
super(GRU, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(learning_rate=30),
is_sparse=False)
h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
h_0 = to_variable(h_0)
self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.hid_dim * 3)
self._fc2 = Linear(
input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(
input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax")
self._gru = DynamicGRU(size=self.hid_dim, h_0=h_0)
@declarative
def forward(self, inputs, label=None):
emb = self.embedding(inputs)
o_np_mask = (fluid.layers.reshape(inputs, [-1, 1]) != self.dict_dim
).astype('float32')
mask_emb = fluid.layers.expand(o_np_mask, [1, self.hid_dim])
emb = emb * mask_emb
emb = fluid.layers.reshape(
emb, shape=[self.batch_size, -1, self.hid_dim])
fc_1 = self._fc1(emb)
gru_hidden = self._gru(fc_1)
gru_hidden = fluid.layers.reduce_max(gru_hidden, dim=1)
tanh_1 = fluid.layers.tanh(gru_hidden)
fc_2 = self._fc2(tanh_1)
prediction = self._fc_prediction(fc_2)
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, prediction, acc
class BiGRU(fluid.dygraph.Layer):
def __init__(self, dict_dim, batch_size, seq_len):
super(BiGRU, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(learning_rate=30),
is_sparse=False)
h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
h_0 = to_variable(h_0)
self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.hid_dim * 3)
self._fc2 = Linear(
input_dim=self.hid_dim * 2, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(
input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax")
self._gru_forward = DynamicGRU(
size=self.hid_dim, h_0=h_0, is_reverse=False)
self._gru_backward = DynamicGRU(
size=self.hid_dim, h_0=h_0, is_reverse=True)
@declarative
def forward(self, inputs, label=None):
emb = self.embedding(inputs)
o_np_mask = (fluid.layers.reshape(inputs, [-1, 1]) != self.dict_dim
).astype('float32')
mask_emb = fluid.layers.expand(o_np_mask, [1, self.hid_dim])
emb = emb * mask_emb
emb = fluid.layers.reshape(
emb, shape=[self.batch_size, -1, self.hid_dim])
fc_1 = self._fc1(emb)
gru_forward = self._gru_forward(fc_1)
gru_backward = self._gru_backward(fc_1)
gru_forward_tanh = fluid.layers.tanh(gru_forward)
gru_backward_tanh = fluid.layers.tanh(gru_backward)
encoded_vector = fluid.layers.concat(
input=[gru_forward_tanh, gru_backward_tanh], axis=2)
encoded_vector = fluid.layers.reduce_max(encoded_vector, dim=1)
fc_2 = self._fc2(encoded_vector)
prediction = self._fc_prediction(fc_2)
# TODO(Aurelius84): Uncomment the following codes when we support return variable-length vars.
# if label is not None:
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, prediction, acc
# else:
# return prediction
def fake_data_reader(class_num, vocab_size, batch_size, padding_size):
def reader():
batch_data = []
while True:
label = np.random.randint(0, class_num)
seq_len = np.random.randint(padding_size // 2,
int(padding_size * 1.2))
word_ids = np.random.randint(0, vocab_size, [seq_len]).tolist()
word_ids = word_ids[:padding_size] + [vocab_size] * (padding_size -
seq_len)
batch_data.append((word_ids, [label], seq_len))
if len(batch_data) == batch_size:
yield batch_data
batch_data = []
return reader
class Args(object):
epoch = 1
batch_size = 4
class_num = 2
lr = 0.01
vocab_size = 1000
padding_size = 50
log_step = 2
train_step = 10
def train(args, to_static):
program_translator.enable(to_static)
place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \
else fluid.CPUPlace()
with fluid.dygraph.guard(place):
np.random.seed(SEED)
fluid.default_startup_program().random_seed = SEED
fluid.default_main_program().random_seed = SEED
train_reader = fake_data_reader(args.class_num, args.vocab_size,
args.batch_size, args.padding_size)
train_loader = fluid.io.DataLoader.from_generator(capacity=24)
train_loader.set_sample_list_generator(train_reader)
if args.model_type == 'cnn_net':
model = CNN(args.vocab_size, args.batch_size, args.padding_size)
elif args.model_type == 'bow_net':
model = BOW(args.vocab_size, args.batch_size, args.padding_size)
elif args.model_type == 'gru_net':
model = GRU(args.vocab_size, args.batch_size, args.padding_size)
elif args.model_type == 'bigru_net':
model = BiGRU(args.vocab_size, args.batch_size, args.padding_size)
sgd_optimizer = fluid.optimizer.Adagrad(
learning_rate=args.lr, parameter_list=model.parameters())
loss_data = []
for eop in range(args.epoch):
time_begin = time.time()
for batch_id, data in enumerate(train_loader()):
word_ids, labels, seq_lens = data
doc = to_variable(word_ids.numpy().reshape(-1)).astype('int64')
label = labels.astype('int64')
model.train()
avg_cost, prediction, acc = model(doc, label)
loss_data.append(avg_cost.numpy()[0])
avg_cost.backward()
sgd_optimizer.minimize(avg_cost)
model.clear_gradients()
if batch_id % args.log_step == 0:
time_end = time.time()
used_time = time_end - time_begin
print("step: %d, ave loss: %f, speed: %f steps/s" %
(batch_id, avg_cost.numpy()[0],
args.log_step / used_time))
time_begin = time.time()
if batch_id == args.train_step:
break
batch_id += 1
return loss_data
class TestSentiment(unittest.TestCase):
def setUp(self):
self.args = Args()
def train_model(self, model_type='cnn_net'):
self.args.model_type = model_type
st_out = train(self.args, True)
dy_out = train(self.args, False)
self.assertTrue(
np.allclose(dy_out, st_out),
msg="dy_out:\n {}\n st_out:\n {}".format(dy_out, st_out))
def test_train(self):
model_types = ['cnn_net', 'bow_net', 'gru_net', 'bigru_net']
for model_type in model_types:
print('training %s ....' % model_type)
self.train_model(model_type)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册