提交 9ce0dcb3 编写于 作者: J JesseyXujin 提交者: pkpk

add gru and bigru net on dygraph (#3913)

* add gru and bigru net

* add gru and bigru

* add gru and bigru
上级 27d556df
......@@ -7,19 +7,22 @@
| :------| :------ |
| CNN | 90.6% |
| BOW | 90.1% |
| GRU | 90.0% |
| BIGRU | 89.7% |
动态图文档请见[Dygraph](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/user_guides/howto/dygraph/DyGraph.html)
## 快速开始
本项目依赖于 Paddlepaddle 1.5.0 及以上版本,请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装
本项目依赖于 Paddlepaddle 1.5.0 及以上版本,请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装
python版本依赖python 2.7或python 3.5及以上版本
python版本依赖python 2.7或python 3.5及以上版本
#### 安装代码
克隆数据集代码库到本地
克隆数据集代码库到本地
```shell
git clone https://github.com/PaddlePaddle/models.git
cd models/dygraph/sentiment
......@@ -35,14 +38,15 @@ tar -zxvf sentiment_classification-dataset-1.0.0.tar.gz
#### 模型训练
基于示例的数据集,可以运行下面的命令,在训练集(train.tsv)上进行模型训练,并在开发集(dev.tsv)验证
基于示例的数据集,可以运行下面的命令,在训练集(train.tsv)上进行模型训练,并在开发集(dev.tsv)验证。
model_type从bow_net,cnn_net,gru_net,bigru_net中选择。
```shell
python main.py
python main.py --model_type=bow_net
```
#### 模型预测
利用已有模型,可以运行下面命令,对未知label的数据(test.tsv)进行预测
利用已有模型,可以运行下面命令,对未知label的数据(test.tsv)进行预测
```shell
python main.py --do_train false --do_infer true --checkpoints ./path_to_save_models
```
......
......@@ -139,17 +139,20 @@ def train():
elif args.model_type == 'bow_net':
model = nets.BOW("bow_net", args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'gru_net':
model = nets.GRU("gru_net", args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bigru_net':
model = nets.BiGRU("bigru_net", args.vocab_size, args.batch_size,
args.padding_size)
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
steps = 0
total_cost, total_acc, total_num_seqs = [], [], []
for eop in range(args.epoch):
time_begin = time.time()
for batch_id, data in enumerate(train_data_generator()):
enable_profile = steps > args.profile_steps
with profile_context(enable_profile):
steps += 1
doc = to_variable(
np.array([
......@@ -160,11 +163,9 @@ def train():
constant_values=(args.vocab_size))
for x in data
]).astype('int64').reshape(-1, 1))
label = to_variable(
np.array([x[1] for x in data]).astype('int64').reshape(
args.batch_size, 1))
model.train()
avg_cost, prediction, acc = model(doc, label)
avg_cost.backward()
......@@ -208,7 +209,6 @@ def train():
eval_doc = to_variable(eval_np_doc.reshape(-1, 1))
eval_avg_cost, eval_prediction, eval_acc = model(
eval_doc, eval_label)
eval_np_mask = (
eval_np_doc != args.vocab_size).astype('int32')
eval_word_num = np.sum(eval_np_mask)
......@@ -266,9 +266,14 @@ def infer():
elif args.model_type == 'bow_net':
model_infer = nets.BOW("bow_net", args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'gru_net':
model_infer = nets.GRU("gru_net", args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bigru_net':
model_infer = nets.BiGRU("bigru_net", args.vocab_size,
args.batch_size, args.padding_size)
print('Do inferring ...... ')
total_acc, total_num_seqs = [], []
restore, _ = fluid.load_dygraph(args.checkpoints)
cnn_net_infer.set_dict(restore)
cnn_net_infer.eval()
......@@ -287,9 +292,7 @@ def infer():
label = to_variable(
np.array([x[1] for x in data]).astype('int64').reshape(
args.batch_size, 1))
_, _, acc = model_infer(doc, label)
mask = (np_doc != args.vocab_size).astype('int32')
word_num = np.sum(mask)
total_acc.append(acc.numpy() * word_num)
......
......@@ -13,10 +13,55 @@
# limitations under the License.
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, Embedding
from paddle.fluid.dygraph import GRUUnit
from paddle.fluid.dygraph.base import to_variable
import numpy as np
class DynamicGRU(fluid.dygraph.Layer):
def __init__(self,
scope_name,
size,
param_attr=None,
bias_attr=None,
is_reverse=False,
gate_activation='sigmoid',
candidate_activation='tanh',
h_0=None,
origin_mode=False,
init_size=None):
super(DynamicGRU, self).__init__(scope_name)
self.gru_unit = GRUUnit(
self.full_name(),
size * 3,
param_attr=param_attr,
bias_attr=bias_attr,
activation=candidate_activation,
gate_activation=gate_activation,
origin_mode=origin_mode)
self.size = size
self.h_0 = h_0
self.is_reverse = is_reverse
def forward(self, inputs):
hidden = self.h_0
res = []
for i in range(inputs.shape[1]):
if self.is_reverse:
i = inputs.shape[1] - 1 - i
input_ = inputs[:, i:i + 1, :]
input_ = fluid.layers.reshape(
input_, [-1, input_.shape[2]], inplace=False)
hidden, reset, gate = self.gru_unit(input_, hidden)
hidden_ = fluid.layers.reshape(
hidden, [-1, 1, hidden.shape[1]], inplace=False)
res.append(hidden_)
if self.is_reverse:
res = res[::-1]
res = fluid.layers.concat(res, axis=1)
return res
class SimpleConvPool(fluid.dygraph.Layer):
def __init__(self,
name_scope,
......@@ -57,7 +102,6 @@ class CNN(fluid.dygraph.Layer):
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
is_sparse=False)
self._simple_conv_pool_1 = SimpleConvPool(
self.full_name(),
self.hid_dim,
......@@ -79,12 +123,10 @@ class CNN(fluid.dygraph.Layer):
conv_3 = self._simple_conv_pool_1(emb)
fc_1 = self._fc1(conv_3)
prediction = self._fc_prediction(fc_1)
if label:
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, prediction, acc
else:
return prediction
......@@ -128,7 +170,113 @@ class BOW(fluid.dygraph.Layer):
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, prediction, acc
else:
return prediction
class GRU(fluid.dygraph.Layer):
def __init__(self, name_scope, dict_dim, batch_size, seq_len):
super(GRU, self).__init__(name_scope)
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
self.full_name(),
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(learning_rate=30),
is_sparse=False)
h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
h_0 = to_variable(h_0)
self._fc1 = FC(self.full_name(),
size=self.hid_dim * 3,
num_flatten_dims=2)
self._fc2 = FC(self.full_name(), size=self.fc_hid_dim, act="tanh")
self._fc_prediction = FC(self.full_name(),
size=self.class_dim,
act="softmax")
self._gru = DynamicGRU(self.full_name(), size=self.hid_dim, h_0=h_0)
def forward(self, inputs, label=None):
emb = self.embedding(inputs)
o_np_mask = to_variable(
inputs.numpy() != self.dict_dim).astype('float32')
mask_emb = fluid.layers.expand(
to_variable(o_np_mask), [1, self.hid_dim])
emb = emb * mask_emb
emb = fluid.layers.reshape(
emb, shape=[self.batch_size, -1, self.hid_dim])
fc_1 = self._fc1(emb)
gru_hidden = self._gru(fc_1)
tanh_1 = fluid.layers.tanh(gru_hidden)
fc_2 = self._fc2(tanh_1)
prediction = self._fc_prediction(fc_2)
if label:
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, prediction, acc
else:
return prediction
class BiGRU(fluid.dygraph.Layer):
def __init__(self, name_scope, dict_dim, batch_size, seq_len):
super(BiGRU, self).__init__(name_scope)
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
self.full_name(),
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(learning_rate=30),
is_sparse=False)
h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
h_0 = to_variable(h_0)
self._fc1 = FC(self.full_name(),
size=self.hid_dim * 3,
num_flatten_dims=2)
self._fc2 = FC(self.full_name(), size=self.fc_hid_dim, act="tanh")
self._fc_prediction = FC(self.full_name(),
size=self.class_dim,
act="softmax")
self._gru_forward = DynamicGRU(
self.full_name(), size=self.hid_dim, h_0=h_0, is_reverse=False)
self._gru_backward = DynamicGRU(
self.full_name(), size=self.hid_dim, h_0=h_0, is_reverse=True)
def forward(self, inputs, label=None):
emb = self.embedding(inputs)
o_np_mask = to_variable(
inputs.numpy() != self.dict_dim).astype('float32')
mask_emb = fluid.layers.expand(
to_variable(o_np_mask), [1, self.hid_dim])
emb = emb * mask_emb
emb = fluid.layers.reshape(
emb, shape=[self.batch_size, -1, self.hid_dim])
fc_1 = self._fc1(emb)
gru_forward = self._gru_forward(fc_1)
gru_backward = self._gru_backward(fc_1)
gru_forward_tanh = fluid.layers.tanh(gru_forward)
gru_backward_tanh = fluid.layers.tanh(gru_backward)
encoded_vector = fluid.layers.concat(
input=[gru_forward_tanh, gru_backward_tanh], axis=2)
fc_2 = self._fc2(encoded_vector)
prediction = self._fc_prediction(fc_2)
if label:
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, prediction, acc
else:
return prediction
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册