diff --git a/dygraph/sentiment/main.py b/dygraph/sentiment/main.py index ee583c2270edbf1e538fd5435cbe120b56083dc5..21c6251474b79459be3b56d30d209fbbc3df4eae 100644 --- a/dygraph/sentiment/main.py +++ b/dygraph/sentiment/main.py @@ -28,7 +28,7 @@ model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints") train_g = ArgumentGroup(parser, "training", "training options.") -train_g.add_arg("epoch", int, 10, "Number of epoches for training.") +train_g.add_arg("epoch", int, 100, "Number of epoches for training.") train_g.add_arg("save_steps", int, 1000, "The steps interval to save checkpoints.") train_g.add_arg("validation_steps", int, 200, @@ -139,10 +139,18 @@ def train(): elif args.model_type == 'bow_net': model = nets.BOW("bow_net", args.vocab_size, args.batch_size, args.padding_size) + elif args.model_type == 'lstm_net': + model = nets.LSTM("lstm_net", args.vocab_size, args.batch_size, + args.padding_size) sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr) steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] - + last_hidden = None + last_cell = None + init_hidden_data = np.zeros( + (1, args.batch_size, 128 * 4), dtype='float32') + init_cell_data = np.zeros( + (1, args.batch_size, 128 * 4), dtype='float32') for eop in range(args.epoch): time_begin = time.time() for batch_id, data in enumerate(train_data_generator()): @@ -166,7 +174,16 @@ def train(): args.batch_size, 1)) model.train() - avg_cost, prediction, acc = model(doc, label) + + if args.model_type == 'lstm_net': + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + avg_cost, prediction, acc, last_hidden, last_cell = model( + doc, init_hidden, init_cell, label) + init_hidden_data = last_hidden.numpy() + init_cell_data = last_cell.numpy() + else: + avg_cost, prediction, acc = model(doc, label) avg_cost.backward() np_mask = (doc.numpy() != args.vocab_size).astype('int32') word_num = np.sum(np_mask) @@ -206,8 +223,18 @@ def train(): np.array([x[1] for x in eval_data]).astype( 'int64').reshape(args.batch_size, 1)) eval_doc = to_variable(eval_np_doc.reshape(-1, 1)) - eval_avg_cost, eval_prediction, eval_acc = model( - eval_doc, eval_label) + if args.model_type == 'lstm_net': + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + eval_avg_cost, eval_prediction, eval_acc, last_hidden, last_cell = model( + eval_doc, init_hidden, init_cell, + eval_label) + init_hidden_data = to_variable( + last_hidden.numpy()) + init_cell_data = to_variable(last_cell.numpy()) + else: + eval_avg_cost, eval_prediction, eval_acc = model( + eval_doc, eval_label) eval_np_mask = ( eval_np_doc != args.vocab_size).astype('int32') @@ -266,6 +293,9 @@ def infer(): elif args.model_type == 'bow_net': model_infer = nets.BOW("bow_net", args.vocab_size, args.batch_size, args.padding_size) + elif args.model_type == 'lstm_net': + model_infer = nets.LSTM("lstm_net", args.vocab_size, + args.batch_size, args.padding_size) print('Do inferring ...... ') total_acc, total_num_seqs = [], [] diff --git a/dygraph/sentiment/nets.py b/dygraph/sentiment/nets.py index edd55ff87431a512ae989a59d1e6e89f60de20fa..6adb5c01798cc034dc1001c71d38f20ba943ea70 100644 --- a/dygraph/sentiment/nets.py +++ b/dygraph/sentiment/nets.py @@ -17,6 +17,110 @@ from paddle.fluid.dygraph.base import to_variable import numpy as np +class SimpleLSTMRNN(fluid.Layer): + def __init__(self, + name_scope, + hidden_size, + num_steps, + num_layers=2, + init_scale=0.1, + dropout=None): + super(SimpleLSTMRNN, self).__init__(name_scope) + self._hidden_size = hidden_size + self._num_layers = num_layers + self._init_scale = init_scale + self._dropout = dropout + self._input = None + self._num_steps = num_steps + self.cell_array = [] + self.hidden_array = [] + + self.weight_1_arr = [] + self.weight_2_arr = [] + self.bias_arr = [] + self.mask_array = [] + + for i in range(self._num_layers): + weight_1 = self.create_parameter( + attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)), + shape=[self._hidden_size * 2, self._hidden_size * 4], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)) + self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) + bias_1 = self.create_parameter( + attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)), + shape=[self._hidden_size * 4], + dtype="float32", + default_initializer=fluid.initializer.Constant(0.0)) + self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) + + def forward(self, input_embedding, init_hidden=None, init_cell=None): + self.cell_array = [] + self.hidden_array = [] + + for i in range(self._num_layers): + pre_hidden = fluid.layers.slice( + init_hidden, axes=[0], starts=[i], ends=[i + 1]) + pre_cell = fluid.layers.slice( + init_cell, axes=[0], starts=[i], ends=[i + 1]) + pre_hidden = fluid.layers.reshape( + pre_hidden, shape=[-1, self._hidden_size]) + pre_cell = fluid.layers.reshape( + pre_cell, shape=[-1, self._hidden_size]) + self.hidden_array.append(pre_hidden) + self.cell_array.append(pre_cell) + + res = [] + for index in range(self._num_steps): + self._input = fluid.layers.slice( + input_embedding, axes=[1], starts=[index], ends=[index + 1]) + self._input = fluid.layers.reshape( + self._input, shape=[-1, self._hidden_size]) + for k in range(self._num_layers): + pre_hidden = self.hidden_array[k] + pre_cell = self.cell_array[k] + weight_1 = self.weight_1_arr[k] + bias = self.bias_arr[k] + + nn = fluid.layers.concat([self._input, pre_hidden], 1) + gate_input = fluid.layers.matmul(x=nn, y=weight_1) + + gate_input = fluid.layers.elementwise_add(gate_input, bias) + i, j, f, o = fluid.layers.split( + gate_input, num_or_sections=4, dim=-1) + c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid( + i) * fluid.layers.tanh(j) + m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o) + self.hidden_array[k] = m + self.cell_array[k] = c + self._input = m + + if self._dropout is not None and self._dropout > 0.0: + self._input = fluid.layers.dropout( + self._input, + dropout_prob=self._dropout, + dropout_implementation='upscale_in_train') + res.append( + fluid.layers.reshape( + self._input, shape=[1, -1, self._hidden_size])) + real_res = fluid.layers.concat(res, 0) + real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) + last_hidden = fluid.layers.concat(self.hidden_array, 1) + last_hidden = fluid.layers.reshape( + last_hidden, shape=[-1, self._num_layers, self._hidden_size]) + last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_cell = fluid.layers.concat(self.cell_array, 1) + last_cell = fluid.layers.reshape( + last_cell, shape=[-1, self._num_layers, self._hidden_size]) + last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + return real_res, last_hidden, last_cell + + class SimpleConvPool(fluid.dygraph.Layer): def __init__(self, name_scope, @@ -132,3 +236,67 @@ class BOW(fluid.dygraph.Layer): return avg_cost, prediction, acc else: return prediction + + +class LSTM(fluid.dygraph.Layer): + def __init__(self, name_scope, dict_dim, batch_size, seq_len): + super(LSTM, self).__init__(name_scope) + self.dict_dim = dict_dim + self.emb_dim = 128 + self.hid_dim = 128 + self.fc_hid_dim = 96 + self.class_dim = 2 + self.lstm_num_steps = 1 + self.lstm_num_layers = 1 + self.batch_size = batch_size + self.seq_len = seq_len + self.embedding = Embedding( + self.full_name(), + size=[self.dict_dim + 1, self.emb_dim], + dtype='float32', + param_attr=fluid.ParamAttr(learning_rate=30), + is_sparse=False) + self._fc1 = FC(self.full_name(), + size=self.hid_dim * 4, + num_flatten_dims=2) + self._fc2 = FC(self.full_name(), size=self.fc_hid_dim, act="tanh") + self._fc_prediction = FC(self.full_name(), + size=self.class_dim, + act="softmax") + self.simple_lstm_rnn = SimpleLSTMRNN( + self.full_name(), + self.hid_dim * 4, + num_steps=self.lstm_num_steps, + num_layers=self.lstm_num_layers, + init_scale=0.1, + dropout=None) + + def forward(self, inputs, init_hidden, init_cell, label=None): + emb = self.embedding(inputs) + o_np_mask = (inputs.numpy() != self.dict_dim).astype('float32') + mask_emb = fluid.layers.expand( + to_variable(o_np_mask), [1, self.hid_dim]) + emb = emb * mask_emb + emb = fluid.layers.reshape( + emb, shape=[-1, 1, self.seq_len, self.hid_dim]) + emb = fluid.layers.reduce_max(emb, dim=1) + fc_1 = self._fc1(emb) + init_h = fluid.layers.reshape( + init_hidden, shape=[self.lstm_num_layers, -1, self.hid_dim * 4]) + init_c = fluid.layers.reshape( + init_cell, shape=[self.lstm_num_layers, -1, self.hid_dim * 4]) + real_res, last_hidden, last_cell = self.simple_lstm_rnn(fc_1, init_h, + init_c) + last_hidden = fluid.layers.reshape( + last_hidden, shape=[-1, self.hid_dim * 4]) + tanh_1 = fluid.layers.tanh(last_hidden) + fc_2 = self._fc2(tanh_1) + prediction = self._fc_prediction(fc_2) + if label: + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc = fluid.layers.accuracy(input=prediction, label=label) + + return avg_cost, prediction, acc, last_hidden, last_cell + else: + return prediction