提交 59132ca3 编写于 作者: R root

add lstm net

上级 e693a685
......@@ -28,7 +28,7 @@ model_g = ArgumentGroup(parser, "model", "model configuration and paths.")
model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints")
train_g = ArgumentGroup(parser, "training", "training options.")
train_g.add_arg("epoch", int, 10, "Number of epoches for training.")
train_g.add_arg("epoch", int, 100, "Number of epoches for training.")
train_g.add_arg("save_steps", int, 1000,
"The steps interval to save checkpoints.")
train_g.add_arg("validation_steps", int, 200,
......@@ -139,10 +139,18 @@ def train():
elif args.model_type == 'bow_net':
model = nets.BOW("bow_net", args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'lstm_net':
model = nets.LSTM("lstm_net", args.vocab_size, args.batch_size,
args.padding_size)
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
steps = 0
total_cost, total_acc, total_num_seqs = [], [], []
last_hidden = None
last_cell = None
init_hidden_data = np.zeros(
(1, args.batch_size, 128 * 4), dtype='float32')
init_cell_data = np.zeros(
(1, args.batch_size, 128 * 4), dtype='float32')
for eop in range(args.epoch):
time_begin = time.time()
for batch_id, data in enumerate(train_data_generator()):
......@@ -166,6 +174,15 @@ def train():
args.batch_size, 1))
model.train()
if args.model_type == 'lstm_net':
init_hidden = to_variable(init_hidden_data)
init_cell = to_variable(init_cell_data)
avg_cost, prediction, acc, last_hidden, last_cell = model(
doc, init_hidden, init_cell, label)
init_hidden_data = last_hidden.numpy()
init_cell_data = last_cell.numpy()
else:
avg_cost, prediction, acc = model(doc, label)
avg_cost.backward()
np_mask = (doc.numpy() != args.vocab_size).astype('int32')
......@@ -206,6 +223,16 @@ def train():
np.array([x[1] for x in eval_data]).astype(
'int64').reshape(args.batch_size, 1))
eval_doc = to_variable(eval_np_doc.reshape(-1, 1))
if args.model_type == 'lstm_net':
init_hidden = to_variable(init_hidden_data)
init_cell = to_variable(init_cell_data)
eval_avg_cost, eval_prediction, eval_acc, last_hidden, last_cell = model(
eval_doc, init_hidden, init_cell,
eval_label)
init_hidden_data = to_variable(
last_hidden.numpy())
init_cell_data = to_variable(last_cell.numpy())
else:
eval_avg_cost, eval_prediction, eval_acc = model(
eval_doc, eval_label)
......@@ -266,6 +293,9 @@ def infer():
elif args.model_type == 'bow_net':
model_infer = nets.BOW("bow_net", args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'lstm_net':
model_infer = nets.LSTM("lstm_net", args.vocab_size,
args.batch_size, args.padding_size)
print('Do inferring ...... ')
total_acc, total_num_seqs = [], []
......
......@@ -17,6 +17,110 @@ from paddle.fluid.dygraph.base import to_variable
import numpy as np
class SimpleLSTMRNN(fluid.Layer):
def __init__(self,
name_scope,
hidden_size,
num_steps,
num_layers=2,
init_scale=0.1,
dropout=None):
super(SimpleLSTMRNN, self).__init__(name_scope)
self._hidden_size = hidden_size
self._num_layers = num_layers
self._init_scale = init_scale
self._dropout = dropout
self._input = None
self._num_steps = num_steps
self.cell_array = []
self.hidden_array = []
self.weight_1_arr = []
self.weight_2_arr = []
self.bias_arr = []
self.mask_array = []
for i in range(self._num_layers):
weight_1 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 2, self._hidden_size * 4],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale))
self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1))
bias_1 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 4],
dtype="float32",
default_initializer=fluid.initializer.Constant(0.0))
self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1))
def forward(self, input_embedding, init_hidden=None, init_cell=None):
self.cell_array = []
self.hidden_array = []
for i in range(self._num_layers):
pre_hidden = fluid.layers.slice(
init_hidden, axes=[0], starts=[i], ends=[i + 1])
pre_cell = fluid.layers.slice(
init_cell, axes=[0], starts=[i], ends=[i + 1])
pre_hidden = fluid.layers.reshape(
pre_hidden, shape=[-1, self._hidden_size])
pre_cell = fluid.layers.reshape(
pre_cell, shape=[-1, self._hidden_size])
self.hidden_array.append(pre_hidden)
self.cell_array.append(pre_cell)
res = []
for index in range(self._num_steps):
self._input = fluid.layers.slice(
input_embedding, axes=[1], starts=[index], ends=[index + 1])
self._input = fluid.layers.reshape(
self._input, shape=[-1, self._hidden_size])
for k in range(self._num_layers):
pre_hidden = self.hidden_array[k]
pre_cell = self.cell_array[k]
weight_1 = self.weight_1_arr[k]
bias = self.bias_arr[k]
nn = fluid.layers.concat([self._input, pre_hidden], 1)
gate_input = fluid.layers.matmul(x=nn, y=weight_1)
gate_input = fluid.layers.elementwise_add(gate_input, bias)
i, j, f, o = fluid.layers.split(
gate_input, num_or_sections=4, dim=-1)
c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid(
i) * fluid.layers.tanh(j)
m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o)
self.hidden_array[k] = m
self.cell_array[k] = c
self._input = m
if self._dropout is not None and self._dropout > 0.0:
self._input = fluid.layers.dropout(
self._input,
dropout_prob=self._dropout,
dropout_implementation='upscale_in_train')
res.append(
fluid.layers.reshape(
self._input, shape=[1, -1, self._hidden_size]))
real_res = fluid.layers.concat(res, 0)
real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2])
last_hidden = fluid.layers.concat(self.hidden_array, 1)
last_hidden = fluid.layers.reshape(
last_hidden, shape=[-1, self._num_layers, self._hidden_size])
last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2])
last_cell = fluid.layers.concat(self.cell_array, 1)
last_cell = fluid.layers.reshape(
last_cell, shape=[-1, self._num_layers, self._hidden_size])
last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2])
return real_res, last_hidden, last_cell
class SimpleConvPool(fluid.dygraph.Layer):
def __init__(self,
name_scope,
......@@ -132,3 +236,67 @@ class BOW(fluid.dygraph.Layer):
return avg_cost, prediction, acc
else:
return prediction
class LSTM(fluid.dygraph.Layer):
def __init__(self, name_scope, dict_dim, batch_size, seq_len):
super(LSTM, self).__init__(name_scope)
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.lstm_num_steps = 1
self.lstm_num_layers = 1
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
self.full_name(),
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(learning_rate=30),
is_sparse=False)
self._fc1 = FC(self.full_name(),
size=self.hid_dim * 4,
num_flatten_dims=2)
self._fc2 = FC(self.full_name(), size=self.fc_hid_dim, act="tanh")
self._fc_prediction = FC(self.full_name(),
size=self.class_dim,
act="softmax")
self.simple_lstm_rnn = SimpleLSTMRNN(
self.full_name(),
self.hid_dim * 4,
num_steps=self.lstm_num_steps,
num_layers=self.lstm_num_layers,
init_scale=0.1,
dropout=None)
def forward(self, inputs, init_hidden, init_cell, label=None):
emb = self.embedding(inputs)
o_np_mask = (inputs.numpy() != self.dict_dim).astype('float32')
mask_emb = fluid.layers.expand(
to_variable(o_np_mask), [1, self.hid_dim])
emb = emb * mask_emb
emb = fluid.layers.reshape(
emb, shape=[-1, 1, self.seq_len, self.hid_dim])
emb = fluid.layers.reduce_max(emb, dim=1)
fc_1 = self._fc1(emb)
init_h = fluid.layers.reshape(
init_hidden, shape=[self.lstm_num_layers, -1, self.hid_dim * 4])
init_c = fluid.layers.reshape(
init_cell, shape=[self.lstm_num_layers, -1, self.hid_dim * 4])
real_res, last_hidden, last_cell = self.simple_lstm_rnn(fc_1, init_h,
init_c)
last_hidden = fluid.layers.reshape(
last_hidden, shape=[-1, self.hid_dim * 4])
tanh_1 = fluid.layers.tanh(last_hidden)
fc_2 = self._fc2(tanh_1)
prediction = self._fc_prediction(fc_2)
if label:
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, prediction, acc, last_hidden, last_cell
else:
return prediction
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册