diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py new file mode 100644 index 0000000000000000000000000000000000000000..473a919ad90d05f034df927b2772cba6492d5855 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py @@ -0,0 +1,573 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import math +import time +import numpy as np +import unittest + +import os +os.environ["CUDA_VISIBLE_DEVICES"] = "2" + +import paddle.fluid as fluid +from paddle.fluid.dygraph import to_variable +from paddle.fluid.dygraph import Embedding, Linear, GRUUnit +from paddle.fluid.dygraph import declarative, ProgramTranslator + +SEED = 2020 + +program_translator = ProgramTranslator() + + +class DynamicGRU(fluid.dygraph.Layer): + def __init__(self, + size, + h_0=None, + param_attr=None, + bias_attr=None, + is_reverse=False, + gate_activation='sigmoid', + candidate_activation='tanh', + origin_mode=False, + init_size=None): + super(DynamicGRU, self).__init__() + + self.gru_unit = GRUUnit( + size * 3, + param_attr=param_attr, + bias_attr=bias_attr, + activation=candidate_activation, + gate_activation=gate_activation, + origin_mode=origin_mode) + + self.size = size + self.h_0 = h_0 + self.is_reverse = is_reverse + + def forward(self, inputs): + # Use `to_variable` to create a copy of global h_0 created not in `DynamicGRU`, + # to avoid modify it because `h_0` is both used in other `DynamicGRU`. + hidden = to_variable(self.h_0) + hidden.stop_gradient = True + + res = [] + for i in range(inputs.shape[1]): + if self.is_reverse: + j = fluid.layers.shape(inputs)[1] - 1 - i + else: + # TODO(Aurelius84): In while block, if the var created in parent block + # participates in the calculation of gradient, the result of gradient + # is incorrect because each step scope always returns the same value + # generated by last step. Here we add 0 to create `j` in while block to + # avoid this bug, and working on fixing it in next PR. + j = i + 0 + # FIXME(Aurelius84): see above explanation. + hidden = fluid.layers.scale(hidden, 1) + + # See above explanation. + # input_ = inputs[:, i:i+1, :] # original code + input_ = fluid.layers.slice( + inputs, axes=[1], starts=[j], ends=[j + 1]) + input_ = fluid.layers.reshape( + input_, [-1, input_.shape[2]], inplace=False) + hidden, reset, gate = self.gru_unit(input_, hidden) + hidden_ = fluid.layers.reshape( + hidden, [-1, 1, hidden.shape[1]], inplace=False) + res.append(hidden_) + + if self.is_reverse: + res = res[::-1] + res = fluid.layers.concat(res, axis=1) + return res + + +class BiGRU(fluid.dygraph.Layer): + def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None): + super(BiGRU, self).__init__() + + self.pre_gru = Linear( + input_dim=input_dim, + output_dim=grnn_hidden_dim * 3, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform( + low=-init_bound, high=init_bound), + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=1e-4))) + + self.gru = DynamicGRU( + size=grnn_hidden_dim, + h_0=h_0, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform( + low=-init_bound, high=init_bound), + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=1e-4))) + + self.pre_gru_r = Linear( + input_dim=input_dim, + output_dim=grnn_hidden_dim * 3, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform( + low=-init_bound, high=init_bound), + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=1e-4))) + + self.gru_r = DynamicGRU( + size=grnn_hidden_dim, + is_reverse=True, + h_0=h_0, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform( + low=-init_bound, high=init_bound), + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=1e-4))) + + def forward(self, input_feature): + res_pre_gru = self.pre_gru(input_feature) + res_gru = self.gru(res_pre_gru) + + res_pre_gru_r = self.pre_gru_r(input_feature) + res_gru_r = self.gru_r(res_pre_gru_r) + + bi_merge = fluid.layers.concat(input=[res_gru, res_gru_r], axis=-1) + return bi_merge + + +class LinearChainCRF(fluid.dygraph.Layer): + def __init__(self, param_attr, size=None, is_test=False, dtype='float32'): + super(LinearChainCRF, self).__init__() + + self._param_attr = param_attr + self._dtype = dtype + self._size = size + self._is_test = is_test + self._transition = self.create_parameter( + attr=self._param_attr, + shape=[self._size + 2, self._size], + dtype=self._dtype) + + @property + def weight(self): + return self._transition + + @weight.setter + def weight(self, value): + self._transition = value + + def forward(self, input, label, length=None): + + alpha = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + emission_exps = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + transition_exps = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + log_likelihood = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + this_inputs = { + "Emission": [input], + "Transition": self._transition, + "Label": [label] + } + if length is not None: + this_inputs['Length'] = [length] + self._helper.append_op( + type='linear_chain_crf', + inputs=this_inputs, + outputs={ + "Alpha": [alpha], + "EmissionExps": [emission_exps], + "TransitionExps": transition_exps, + "LogLikelihood": log_likelihood + }, + attrs={"is_test": self._is_test, }) + return log_likelihood + + +class CRFDecoding(fluid.dygraph.Layer): + def __init__(self, param_attr, size=None, is_test=False, dtype='float32'): + super(CRFDecoding, self).__init__() + + self._dtype = dtype + self._size = size + self._is_test = is_test + self._param_attr = param_attr + self._transition = self.create_parameter( + attr=self._param_attr, + shape=[self._size + 2, self._size], + dtype=self._dtype) + + @property + def weight(self): + return self._transition + + @weight.setter + def weight(self, value): + self._transition = value + + def forward(self, input, label=None, length=None): + + viterbi_path = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + this_inputs = { + "Emission": [input], + "Transition": self._transition, + "Label": label + } + if length is not None: + this_inputs['Length'] = [length] + self._helper.append_op( + type='crf_decoding', + inputs=this_inputs, + outputs={"ViterbiPath": [viterbi_path]}, + attrs={"is_test": self._is_test, }) + return viterbi_path + + +class ChunkEval(fluid.dygraph.Layer): + def __init__(self, num_chunk_types, chunk_scheme, + excluded_chunk_types=None): + super(ChunkEval, self).__init__() + self.num_chunk_types = num_chunk_types + self.chunk_scheme = chunk_scheme + self.excluded_chunk_types = excluded_chunk_types + + def forward(self, input, label, seq_length=None): + + precision = self._helper.create_variable_for_type_inference( + dtype="float32") + recall = self._helper.create_variable_for_type_inference( + dtype="float32") + f1_score = self._helper.create_variable_for_type_inference( + dtype="float32") + num_infer_chunks = self._helper.create_variable_for_type_inference( + dtype="int64") + num_label_chunks = self._helper.create_variable_for_type_inference( + dtype="int64") + num_correct_chunks = self._helper.create_variable_for_type_inference( + dtype="int64") + + this_input = {"Inference": [input], "Label": [label]} + if seq_length is not None: + this_input["SeqLength"] = [seq_length] + + self._helper.append_op( + type='chunk_eval', + inputs=this_input, + outputs={ + "Precision": [precision], + "Recall": [recall], + "F1-Score": [f1_score], + "NumInferChunks": [num_infer_chunks], + "NumLabelChunks": [num_label_chunks], + "NumCorrectChunks": [num_correct_chunks] + }, + attrs={ + "num_chunk_types": self.num_chunk_types, + "chunk_scheme": self.chunk_scheme, + "excluded_chunk_types": self.excluded_chunk_types or [] + }) + return (precision, recall, f1_score, num_infer_chunks, num_label_chunks, + num_correct_chunks) + + +class LexNet(fluid.dygraph.Layer): + def __init__(self, args, length=None): + super(LexNet, self).__init__() + """ + define the lexical analysis network structure + word: stores the input of the model + for_infer: a boolean value, indicating if the model to be created is for training or predicting. + + return: + for infer: return the prediction + otherwise: return the prediction + """ + self.word_emb_dim = args.word_emb_dim + self.vocab_size = args.vocab_size + self.num_labels = args.num_labels + self.grnn_hidden_dim = args.grnn_hidden_dim + self.emb_lr = args.emb_learning_rate if 'emb_learning_rate' in dir( + args) else 1.0 + self.crf_lr = args.emb_learning_rate if 'crf_learning_rate' in dir( + args) else 1.0 + self.bigru_num = args.bigru_num + self.init_bound = 0.1 + + self.word_embedding = Embedding( + size=[self.vocab_size, self.word_emb_dim], + dtype='float32', + param_attr=fluid.ParamAttr( + learning_rate=self.emb_lr, + name="word_emb", + initializer=fluid.initializer.Uniform( + low=-self.init_bound, high=self.init_bound))) + + h_0 = np.zeros((args.batch_size, self.grnn_hidden_dim), dtype="float32") + h_0 = to_variable(h_0) + + self.bigru_units = [] + for i in range(self.bigru_num): + if i == 0: + self.bigru_units.append( + self.add_sublayer( + "bigru_units%d" % i, + BiGRU( + self.grnn_hidden_dim, + self.grnn_hidden_dim, + self.init_bound, + h_0=h_0))) + else: + self.bigru_units.append( + self.add_sublayer( + "bigru_units%d" % i, + BiGRU( + self.grnn_hidden_dim * 2, + self.grnn_hidden_dim, + self.init_bound, + h_0=h_0))) + + self.fc = Linear( + input_dim=self.grnn_hidden_dim * 2, + output_dim=self.num_labels, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform( + low=-self.init_bound, high=self.init_bound), + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=1e-4))) + + self.linear_chain_crf = LinearChainCRF( + param_attr=fluid.ParamAttr( + name='linear_chain_crfw', learning_rate=self.crf_lr), + size=self.num_labels) + + self.crf_decoding = CRFDecoding( + param_attr=fluid.ParamAttr( + name='crfw', learning_rate=self.crf_lr), + size=self.num_labels) + # share weight + self.crf_decoding.weight = self.linear_chain_crf.weight + + @declarative + def forward(self, word, target, length=None): + """ + Configure the network + """ + word_embed = self.word_embedding(word) + input_feature = word_embed + + for i in range(self.bigru_num): + bigru_output = self.bigru_units[i](input_feature) + input_feature = bigru_output + + emission = self.fc(bigru_output) + + crf_cost = self.linear_chain_crf( + input=emission, label=target, length=length) + avg_cost = fluid.layers.mean(x=crf_cost) + crf_decode = self.crf_decoding(input=emission, length=length) + return avg_cost, crf_decode + + +class Args(object): + epoch = 1 + batch_size = 4 + vocab_size = 100 + num_labels = 10 + word_emb_dim = 128 + grnn_hidden_dim = 128 + base_learning_rate = 0.01 + bigru_num = 2 + print_steps = 1 + model_save_dir = "./lac_model" + dy_param_path = "./lac_dy_param" + + +def get_random_input_data(batch_size, vocab_size, num_labels, max_seq_len=64): + local_random = np.random.RandomState(SEED) + padding_id = np.int64(0) + iter_num = 5 + + def __reader__(): + batch, init_lens = [], [] + for i in range(iter_num * batch_size): + cur_len = local_random.randint(3, max_seq_len) + word_ids = local_random.randint(0, vocab_size, + [cur_len]).astype('int64').tolist() + label_ids = local_random.randint(0, num_labels, + [cur_len]).astype('int64').tolist() + batch.append((word_ids, label_ids)) + init_lens.append(cur_len) + if len(batch) == batch_size: + batch_max_len = min(max(init_lens), max_seq_len) + new_batch = [] + for words_len, (word_ids, label_ids) in zip(init_lens, batch): + word_ids = word_ids[0:batch_max_len] + words_len = np.int64(len(word_ids)) + word_ids += [ + padding_id for _ in range(batch_max_len - words_len) + ] + label_ids = label_ids[0:batch_max_len] + label_ids += [ + padding_id for _ in range(batch_max_len - words_len) + ] + assert len(word_ids) == len(label_ids) + new_batch.append((word_ids, label_ids, words_len)) + yield new_batch + batch, init_lens = [], [] + + return __reader__ + + +def create_dataloader(reader, place): + data_loader = fluid.io.DataLoader.from_generator( + capacity=16, use_double_buffer=True, iterable=True) + + data_loader.set_sample_list_generator(reader, places=place) + + return data_loader + + +def do_train(args, to_static): + program_translator.enable(to_static) + place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.dygraph.guard(place): + fluid.default_startup_program().random_seed = SEED + fluid.default_main_program().random_seed = SEED + + reader = get_random_input_data(args.batch_size, args.vocab_size, + args.num_labels) + train_loader = create_dataloader(reader, place) + + model = LexNet(args) + optimizer = fluid.optimizer.AdamOptimizer( + learning_rate=args.base_learning_rate, + parameter_list=model.parameters()) + chunk_eval = ChunkEval( + int(math.ceil((args.num_labels - 1) / 2.0)), "IOB") + + step = 0 + chunk_evaluator = fluid.metrics.ChunkEvaluator() + chunk_evaluator.reset() + + loss_data = [] + for epoch_id in range(args.epoch): + for batch in train_loader(): + words, targets, length = batch + start_time = time.time() + avg_cost, crf_decode = model(words, targets, length) + loss_data.append(avg_cost.numpy()[0]) + + # backward and optimization + avg_cost.backward() + optimizer.minimize(avg_cost) + model.clear_gradients() + end_time = time.time() + + if step % args.print_steps == 0: + (precision, recall, f1_score, num_infer_chunks, + num_label_chunks, num_correct_chunks) = chunk_eval( + input=crf_decode, label=targets, seq_length=length) + outputs = [avg_cost, precision, recall, f1_score] + avg_cost, precision, recall, f1_score = [ + np.mean(x.numpy()) for x in outputs + ] + + print( + "[train] step = %d, loss = %f, P: %f, R: %f, F1: %f, elapsed time %f" + % (step, avg_cost, precision, recall, f1_score, + end_time - start_time)) + + step += 1 + # save inference model + if to_static: + program_translator.save_inference_model( + dirname=args.model_save_dir, feed=[0, 2], fetch=[1]) + else: + fluid.dygraph.save_dygraph(model.state_dict(), args.dy_param_path) + + return np.array(loss_data) + + +class TestLACModel(unittest.TestCase): + def setUp(self): + self.args = Args() + self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( + ) else fluid.CPUPlace() + + def train(self, to_static): + out = do_train(self.args, to_static) + return out + + def test_train(self): + dy_out = self.train(to_static=False) + st_out = self.train(to_static=True) + self.assertTrue( + np.allclose(dy_out, st_out), + msg="dygraph output:\n{},\nstatic output:\n {}.".format(dy_out, + st_out)) + # Prediction needs trained models, so put `test_predict` at last of `test_train` + self.verify_predict() + + def verify_predict(self): + reader = get_random_input_data( + self.args.batch_size, self.args.vocab_size, self.args.num_labels) + for batch in reader(): + batch = [np.vstack(var) for var in zip(*batch)] + dy_pre = self.predict_dygraph(batch) + st_pre = self.predict_static(batch) + self.assertTrue( + np.allclose(dy_pre, st_pre), + msg="dy_pre:\n {}\n, st_pre: \n{}.".format(dy_pre, st_pre)) + + def predict_dygraph(self, batch): + words, targets, length = batch + program_translator.enable(False) + with fluid.dygraph.guard(self.place): + model = LexNet(self.args) + # load dygraph trained parameters + model_dict, _ = fluid.load_dygraph(self.args.dy_param_path + + ".pdparams") + model.set_dict(model_dict) + model.eval() + + _, pred_res = model( + to_variable(words), to_variable(targets), to_variable(length)) + + return pred_res.numpy() + + def predict_static(self, batch): + """ + LAC model contains h_0 created in `__init__` that is necessary for inferring. + Load inference model to test it's ok for prediction. + """ + exe = fluid.Executor(self.place) + # load inference model + [inference_program, feed_target_names, + fetch_targets] = fluid.io.load_inference_model( + self.args.model_save_dir, executor=exe) + + words, targets, length = batch + pred_res = exe.run( + inference_program, + feed={feed_target_names[0]: words, + feed_target_names[1]: length}, + fetch_list=fetch_targets) + return pred_res[0] + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py new file mode 100644 index 0000000000000000000000000000000000000000..9d73ff14057095ca686e62228870006514093cec --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py @@ -0,0 +1,355 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time +import unittest +import numpy as np + +import paddle.fluid as fluid +from paddle.fluid.dygraph.nn import Conv2D, Linear, Embedding +from paddle.fluid.dygraph import to_variable, ProgramTranslator, declarative + +from test_lac import DynamicGRU + +SEED = 2020 +program_translator = ProgramTranslator() + +# Note: Set True to eliminate randomness. +# 1. For one operation, cuDNN has several algorithms, +# some algorithm results are non-deterministic, like convolution algorithms. +if fluid.is_compiled_with_cuda(): + fluid.set_flags({'FLAGS_cudnn_deterministic': True}) + + +class SimpleConvPool(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + use_cudnn=True, + batch_size=None): + super(SimpleConvPool, self).__init__() + self.batch_size = batch_size + self._conv2d = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + padding=[1, 1], + use_cudnn=use_cudnn, + act='tanh') + + def forward(self, inputs): + x = self._conv2d(inputs) + x = fluid.layers.reduce_max(x, dim=-1) + x = fluid.layers.reshape(x, shape=[self.batch_size, -1]) + return x + + +class CNN(fluid.dygraph.Layer): + def __init__(self, dict_dim, batch_size, seq_len): + super(CNN, self).__init__() + self.dict_dim = dict_dim + self.emb_dim = 128 + self.hid_dim = 128 + self.fc_hid_dim = 96 + self.class_dim = 2 + self.channels = 1 + self.win_size = [3, self.hid_dim] + self.batch_size = batch_size + self.seq_len = seq_len + self.embedding = Embedding( + size=[self.dict_dim + 1, self.emb_dim], + dtype='float32', + is_sparse=False) + self._simple_conv_pool_1 = SimpleConvPool( + self.channels, + self.hid_dim, + self.win_size, + batch_size=self.batch_size) + self._fc1 = Linear( + input_dim=self.hid_dim * self.seq_len, + output_dim=self.fc_hid_dim, + act="softmax") + self._fc_prediction = Linear( + input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax") + + @declarative + def forward(self, inputs, label=None): + emb = self.embedding(inputs) + o_np_mask = ( + fluid.layers.reshape(inputs, [-1, 1]) != self.dict_dim).astype( + dtype='float32') + mask_emb = fluid.layers.expand(o_np_mask, [1, self.hid_dim]) + emb = emb * mask_emb + emb = fluid.layers.reshape( + emb, shape=[-1, self.channels, self.seq_len, self.hid_dim]) + conv_3 = self._simple_conv_pool_1(emb) + fc_1 = self._fc1(conv_3) + prediction = self._fc_prediction(fc_1) + + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc = fluid.layers.accuracy(input=prediction, label=label) + return avg_cost, prediction, acc + + +class BOW(fluid.dygraph.Layer): + def __init__(self, dict_dim, batch_size, seq_len): + super(BOW, self).__init__() + self.dict_dim = dict_dim + self.emb_dim = 128 + self.hid_dim = 128 + self.fc_hid_dim = 96 + self.class_dim = 2 + self.batch_size = batch_size + self.seq_len = seq_len + self.embedding = Embedding( + size=[self.dict_dim + 1, self.emb_dim], + dtype='float32', + is_sparse=False) + self._fc1 = Linear( + input_dim=self.hid_dim, output_dim=self.hid_dim, act="tanh") + self._fc2 = Linear( + input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh") + self._fc_prediction = Linear( + input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax") + + @declarative + def forward(self, inputs, label=None): + emb = self.embedding(inputs) + o_np_mask = ( + fluid.layers.reshape(inputs, [-1, 1]) != self.dict_dim).astype( + dtype='float32') + mask_emb = fluid.layers.expand(o_np_mask, [1, self.hid_dim]) + emb = emb * mask_emb + emb = fluid.layers.reshape(emb, shape=[-1, self.seq_len, self.hid_dim]) + bow_1 = fluid.layers.reduce_sum(emb, dim=1) + bow_1 = fluid.layers.tanh(bow_1) + fc_1 = self._fc1(bow_1) + fc_2 = self._fc2(fc_1) + prediction = self._fc_prediction(fc_2) + + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc = fluid.layers.accuracy(input=prediction, label=label) + return avg_cost, prediction, acc + + +class GRU(fluid.dygraph.Layer): + def __init__(self, dict_dim, batch_size, seq_len): + super(GRU, self).__init__() + self.dict_dim = dict_dim + self.emb_dim = 128 + self.hid_dim = 128 + self.fc_hid_dim = 96 + self.class_dim = 2 + self.batch_size = batch_size + self.seq_len = seq_len + self.embedding = Embedding( + size=[self.dict_dim + 1, self.emb_dim], + dtype='float32', + param_attr=fluid.ParamAttr(learning_rate=30), + is_sparse=False) + h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32") + h_0 = to_variable(h_0) + self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.hid_dim * 3) + self._fc2 = Linear( + input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh") + self._fc_prediction = Linear( + input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax") + self._gru = DynamicGRU(size=self.hid_dim, h_0=h_0) + + @declarative + def forward(self, inputs, label=None): + emb = self.embedding(inputs) + o_np_mask = (fluid.layers.reshape(inputs, [-1, 1]) != self.dict_dim + ).astype('float32') + mask_emb = fluid.layers.expand(o_np_mask, [1, self.hid_dim]) + emb = emb * mask_emb + emb = fluid.layers.reshape( + emb, shape=[self.batch_size, -1, self.hid_dim]) + fc_1 = self._fc1(emb) + gru_hidden = self._gru(fc_1) + gru_hidden = fluid.layers.reduce_max(gru_hidden, dim=1) + tanh_1 = fluid.layers.tanh(gru_hidden) + fc_2 = self._fc2(tanh_1) + prediction = self._fc_prediction(fc_2) + + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc = fluid.layers.accuracy(input=prediction, label=label) + return avg_cost, prediction, acc + + +class BiGRU(fluid.dygraph.Layer): + def __init__(self, dict_dim, batch_size, seq_len): + super(BiGRU, self).__init__() + self.dict_dim = dict_dim + self.emb_dim = 128 + self.hid_dim = 128 + self.fc_hid_dim = 96 + self.class_dim = 2 + self.batch_size = batch_size + self.seq_len = seq_len + self.embedding = Embedding( + size=[self.dict_dim + 1, self.emb_dim], + dtype='float32', + param_attr=fluid.ParamAttr(learning_rate=30), + is_sparse=False) + h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32") + h_0 = to_variable(h_0) + self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.hid_dim * 3) + self._fc2 = Linear( + input_dim=self.hid_dim * 2, output_dim=self.fc_hid_dim, act="tanh") + self._fc_prediction = Linear( + input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax") + self._gru_forward = DynamicGRU( + size=self.hid_dim, h_0=h_0, is_reverse=False) + self._gru_backward = DynamicGRU( + size=self.hid_dim, h_0=h_0, is_reverse=True) + + @declarative + def forward(self, inputs, label=None): + emb = self.embedding(inputs) + o_np_mask = (fluid.layers.reshape(inputs, [-1, 1]) != self.dict_dim + ).astype('float32') + mask_emb = fluid.layers.expand(o_np_mask, [1, self.hid_dim]) + emb = emb * mask_emb + emb = fluid.layers.reshape( + emb, shape=[self.batch_size, -1, self.hid_dim]) + fc_1 = self._fc1(emb) + gru_forward = self._gru_forward(fc_1) + gru_backward = self._gru_backward(fc_1) + gru_forward_tanh = fluid.layers.tanh(gru_forward) + gru_backward_tanh = fluid.layers.tanh(gru_backward) + encoded_vector = fluid.layers.concat( + input=[gru_forward_tanh, gru_backward_tanh], axis=2) + encoded_vector = fluid.layers.reduce_max(encoded_vector, dim=1) + fc_2 = self._fc2(encoded_vector) + prediction = self._fc_prediction(fc_2) + # TODO(Aurelius84): Uncomment the following codes when we support return variable-length vars. + # if label is not None: + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc = fluid.layers.accuracy(input=prediction, label=label) + return avg_cost, prediction, acc + # else: + # return prediction + + +def fake_data_reader(class_num, vocab_size, batch_size, padding_size): + def reader(): + batch_data = [] + while True: + label = np.random.randint(0, class_num) + seq_len = np.random.randint(padding_size // 2, + int(padding_size * 1.2)) + word_ids = np.random.randint(0, vocab_size, [seq_len]).tolist() + word_ids = word_ids[:padding_size] + [vocab_size] * (padding_size - + seq_len) + batch_data.append((word_ids, [label], seq_len)) + if len(batch_data) == batch_size: + yield batch_data + batch_data = [] + + return reader + + +class Args(object): + epoch = 1 + batch_size = 4 + class_num = 2 + lr = 0.01 + vocab_size = 1000 + padding_size = 50 + log_step = 2 + train_step = 10 + + +def train(args, to_static): + program_translator.enable(to_static) + place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \ + else fluid.CPUPlace() + + with fluid.dygraph.guard(place): + np.random.seed(SEED) + fluid.default_startup_program().random_seed = SEED + fluid.default_main_program().random_seed = SEED + + train_reader = fake_data_reader(args.class_num, args.vocab_size, + args.batch_size, args.padding_size) + train_loader = fluid.io.DataLoader.from_generator(capacity=24) + train_loader.set_sample_list_generator(train_reader) + + if args.model_type == 'cnn_net': + model = CNN(args.vocab_size, args.batch_size, args.padding_size) + elif args.model_type == 'bow_net': + model = BOW(args.vocab_size, args.batch_size, args.padding_size) + elif args.model_type == 'gru_net': + model = GRU(args.vocab_size, args.batch_size, args.padding_size) + elif args.model_type == 'bigru_net': + model = BiGRU(args.vocab_size, args.batch_size, args.padding_size) + sgd_optimizer = fluid.optimizer.Adagrad( + learning_rate=args.lr, parameter_list=model.parameters()) + + loss_data = [] + for eop in range(args.epoch): + time_begin = time.time() + for batch_id, data in enumerate(train_loader()): + word_ids, labels, seq_lens = data + doc = to_variable(word_ids.numpy().reshape(-1)).astype('int64') + label = labels.astype('int64') + + model.train() + avg_cost, prediction, acc = model(doc, label) + loss_data.append(avg_cost.numpy()[0]) + + avg_cost.backward() + sgd_optimizer.minimize(avg_cost) + model.clear_gradients() + + if batch_id % args.log_step == 0: + time_end = time.time() + used_time = time_end - time_begin + print("step: %d, ave loss: %f, speed: %f steps/s" % + (batch_id, avg_cost.numpy()[0], + args.log_step / used_time)) + time_begin = time.time() + + if batch_id == args.train_step: + break + batch_id += 1 + return loss_data + + +class TestSentiment(unittest.TestCase): + def setUp(self): + self.args = Args() + + def train_model(self, model_type='cnn_net'): + self.args.model_type = model_type + st_out = train(self.args, True) + dy_out = train(self.args, False) + self.assertTrue( + np.allclose(dy_out, st_out), + msg="dy_out:\n {}\n st_out:\n {}".format(dy_out, st_out)) + + def test_train(self): + model_types = ['cnn_net', 'bow_net', 'gru_net', 'bigru_net'] + for model_type in model_types: + print('training %s ....' % model_type) + self.train_model(model_type) + + +if __name__ == '__main__': + unittest.main()