提交 9ce0dcb3 编写于 作者: J JesseyXujin 提交者: pkpk

add gru and bigru net on dygraph (#3913)

* add gru and bigru net

* add gru and bigru

* add gru and bigru
上级 27d556df
...@@ -7,19 +7,22 @@ ...@@ -7,19 +7,22 @@
| :------| :------ | | :------| :------ |
| CNN | 90.6% | | CNN | 90.6% |
| BOW | 90.1% | | BOW | 90.1% |
| GRU | 90.0% |
| BIGRU | 89.7% |
动态图文档请见[Dygraph](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/user_guides/howto/dygraph/DyGraph.html) 动态图文档请见[Dygraph](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/user_guides/howto/dygraph/DyGraph.html)
## 快速开始 ## 快速开始
本项目依赖于 Paddlepaddle 1.5.0 及以上版本,请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装 本项目依赖于 Paddlepaddle 1.5.0 及以上版本,请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装
python版本依赖python 2.7或python 3.5及以上版本 python版本依赖python 2.7或python 3.5及以上版本
#### 安装代码 #### 安装代码
克隆数据集代码库到本地 克隆数据集代码库到本地
```shell ```shell
git clone https://github.com/PaddlePaddle/models.git git clone https://github.com/PaddlePaddle/models.git
cd models/dygraph/sentiment cd models/dygraph/sentiment
...@@ -35,14 +38,15 @@ tar -zxvf sentiment_classification-dataset-1.0.0.tar.gz ...@@ -35,14 +38,15 @@ tar -zxvf sentiment_classification-dataset-1.0.0.tar.gz
#### 模型训练 #### 模型训练
基于示例的数据集,可以运行下面的命令,在训练集(train.tsv)上进行模型训练,并在开发集(dev.tsv)验证 基于示例的数据集,可以运行下面的命令,在训练集(train.tsv)上进行模型训练,并在开发集(dev.tsv)验证。
model_type从bow_net,cnn_net,gru_net,bigru_net中选择。
```shell ```shell
python main.py python main.py --model_type=bow_net
``` ```
#### 模型预测 #### 模型预测
利用已有模型,可以运行下面命令,对未知label的数据(test.tsv)进行预测 利用已有模型,可以运行下面命令,对未知label的数据(test.tsv)进行预测
```shell ```shell
python main.py --do_train false --do_infer true --checkpoints ./path_to_save_models python main.py --do_train false --do_infer true --checkpoints ./path_to_save_models
``` ```
......
...@@ -139,17 +139,20 @@ def train(): ...@@ -139,17 +139,20 @@ def train():
elif args.model_type == 'bow_net': elif args.model_type == 'bow_net':
model = nets.BOW("bow_net", args.vocab_size, args.batch_size, model = nets.BOW("bow_net", args.vocab_size, args.batch_size,
args.padding_size) args.padding_size)
elif args.model_type == 'gru_net':
model = nets.GRU("gru_net", args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bigru_net':
model = nets.BiGRU("bigru_net", args.vocab_size, args.batch_size,
args.padding_size)
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr) sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
steps = 0 steps = 0
total_cost, total_acc, total_num_seqs = [], [], [] total_cost, total_acc, total_num_seqs = [], [], []
for eop in range(args.epoch): for eop in range(args.epoch):
time_begin = time.time() time_begin = time.time()
for batch_id, data in enumerate(train_data_generator()): for batch_id, data in enumerate(train_data_generator()):
enable_profile = steps > args.profile_steps enable_profile = steps > args.profile_steps
with profile_context(enable_profile): with profile_context(enable_profile):
steps += 1 steps += 1
doc = to_variable( doc = to_variable(
np.array([ np.array([
...@@ -160,11 +163,9 @@ def train(): ...@@ -160,11 +163,9 @@ def train():
constant_values=(args.vocab_size)) constant_values=(args.vocab_size))
for x in data for x in data
]).astype('int64').reshape(-1, 1)) ]).astype('int64').reshape(-1, 1))
label = to_variable( label = to_variable(
np.array([x[1] for x in data]).astype('int64').reshape( np.array([x[1] for x in data]).astype('int64').reshape(
args.batch_size, 1)) args.batch_size, 1))
model.train() model.train()
avg_cost, prediction, acc = model(doc, label) avg_cost, prediction, acc = model(doc, label)
avg_cost.backward() avg_cost.backward()
...@@ -208,7 +209,6 @@ def train(): ...@@ -208,7 +209,6 @@ def train():
eval_doc = to_variable(eval_np_doc.reshape(-1, 1)) eval_doc = to_variable(eval_np_doc.reshape(-1, 1))
eval_avg_cost, eval_prediction, eval_acc = model( eval_avg_cost, eval_prediction, eval_acc = model(
eval_doc, eval_label) eval_doc, eval_label)
eval_np_mask = ( eval_np_mask = (
eval_np_doc != args.vocab_size).astype('int32') eval_np_doc != args.vocab_size).astype('int32')
eval_word_num = np.sum(eval_np_mask) eval_word_num = np.sum(eval_np_mask)
...@@ -266,9 +266,14 @@ def infer(): ...@@ -266,9 +266,14 @@ def infer():
elif args.model_type == 'bow_net': elif args.model_type == 'bow_net':
model_infer = nets.BOW("bow_net", args.vocab_size, args.batch_size, model_infer = nets.BOW("bow_net", args.vocab_size, args.batch_size,
args.padding_size) args.padding_size)
elif args.model_type == 'gru_net':
model_infer = nets.GRU("gru_net", args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bigru_net':
model_infer = nets.BiGRU("bigru_net", args.vocab_size,
args.batch_size, args.padding_size)
print('Do inferring ...... ') print('Do inferring ...... ')
total_acc, total_num_seqs = [], [] total_acc, total_num_seqs = [], []
restore, _ = fluid.load_dygraph(args.checkpoints) restore, _ = fluid.load_dygraph(args.checkpoints)
cnn_net_infer.set_dict(restore) cnn_net_infer.set_dict(restore)
cnn_net_infer.eval() cnn_net_infer.eval()
...@@ -287,9 +292,7 @@ def infer(): ...@@ -287,9 +292,7 @@ def infer():
label = to_variable( label = to_variable(
np.array([x[1] for x in data]).astype('int64').reshape( np.array([x[1] for x in data]).astype('int64').reshape(
args.batch_size, 1)) args.batch_size, 1))
_, _, acc = model_infer(doc, label) _, _, acc = model_infer(doc, label)
mask = (np_doc != args.vocab_size).astype('int32') mask = (np_doc != args.vocab_size).astype('int32')
word_num = np.sum(mask) word_num = np.sum(mask)
total_acc.append(acc.numpy() * word_num) total_acc.append(acc.numpy() * word_num)
......
...@@ -13,10 +13,55 @@ ...@@ -13,10 +13,55 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, Embedding from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, Embedding
from paddle.fluid.dygraph import GRUUnit
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
import numpy as np import numpy as np
class DynamicGRU(fluid.dygraph.Layer):
def __init__(self,
scope_name,
size,
param_attr=None,
bias_attr=None,
is_reverse=False,
gate_activation='sigmoid',
candidate_activation='tanh',
h_0=None,
origin_mode=False,
init_size=None):
super(DynamicGRU, self).__init__(scope_name)
self.gru_unit = GRUUnit(
self.full_name(),
size * 3,
param_attr=param_attr,
bias_attr=bias_attr,
activation=candidate_activation,
gate_activation=gate_activation,
origin_mode=origin_mode)
self.size = size
self.h_0 = h_0
self.is_reverse = is_reverse
def forward(self, inputs):
hidden = self.h_0
res = []
for i in range(inputs.shape[1]):
if self.is_reverse:
i = inputs.shape[1] - 1 - i
input_ = inputs[:, i:i + 1, :]
input_ = fluid.layers.reshape(
input_, [-1, input_.shape[2]], inplace=False)
hidden, reset, gate = self.gru_unit(input_, hidden)
hidden_ = fluid.layers.reshape(
hidden, [-1, 1, hidden.shape[1]], inplace=False)
res.append(hidden_)
if self.is_reverse:
res = res[::-1]
res = fluid.layers.concat(res, axis=1)
return res
class SimpleConvPool(fluid.dygraph.Layer): class SimpleConvPool(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
...@@ -57,7 +102,6 @@ class CNN(fluid.dygraph.Layer): ...@@ -57,7 +102,6 @@ class CNN(fluid.dygraph.Layer):
size=[self.dict_dim + 1, self.emb_dim], size=[self.dict_dim + 1, self.emb_dim],
dtype='float32', dtype='float32',
is_sparse=False) is_sparse=False)
self._simple_conv_pool_1 = SimpleConvPool( self._simple_conv_pool_1 = SimpleConvPool(
self.full_name(), self.full_name(),
self.hid_dim, self.hid_dim,
...@@ -79,12 +123,10 @@ class CNN(fluid.dygraph.Layer): ...@@ -79,12 +123,10 @@ class CNN(fluid.dygraph.Layer):
conv_3 = self._simple_conv_pool_1(emb) conv_3 = self._simple_conv_pool_1(emb)
fc_1 = self._fc1(conv_3) fc_1 = self._fc1(conv_3)
prediction = self._fc_prediction(fc_1) prediction = self._fc_prediction(fc_1)
if label: if label:
cost = fluid.layers.cross_entropy(input=prediction, label=label) cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label) acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, prediction, acc return avg_cost, prediction, acc
else: else:
return prediction return prediction
...@@ -128,7 +170,113 @@ class BOW(fluid.dygraph.Layer): ...@@ -128,7 +170,113 @@ class BOW(fluid.dygraph.Layer):
cost = fluid.layers.cross_entropy(input=prediction, label=label) cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label) acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, prediction, acc
else:
return prediction
class GRU(fluid.dygraph.Layer):
def __init__(self, name_scope, dict_dim, batch_size, seq_len):
super(GRU, self).__init__(name_scope)
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
self.full_name(),
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(learning_rate=30),
is_sparse=False)
h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
h_0 = to_variable(h_0)
self._fc1 = FC(self.full_name(),
size=self.hid_dim * 3,
num_flatten_dims=2)
self._fc2 = FC(self.full_name(), size=self.fc_hid_dim, act="tanh")
self._fc_prediction = FC(self.full_name(),
size=self.class_dim,
act="softmax")
self._gru = DynamicGRU(self.full_name(), size=self.hid_dim, h_0=h_0)
def forward(self, inputs, label=None):
emb = self.embedding(inputs)
o_np_mask = to_variable(
inputs.numpy() != self.dict_dim).astype('float32')
mask_emb = fluid.layers.expand(
to_variable(o_np_mask), [1, self.hid_dim])
emb = emb * mask_emb
emb = fluid.layers.reshape(
emb, shape=[self.batch_size, -1, self.hid_dim])
fc_1 = self._fc1(emb)
gru_hidden = self._gru(fc_1)
tanh_1 = fluid.layers.tanh(gru_hidden)
fc_2 = self._fc2(tanh_1)
prediction = self._fc_prediction(fc_2)
if label:
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, prediction, acc
else:
return prediction
class BiGRU(fluid.dygraph.Layer):
def __init__(self, name_scope, dict_dim, batch_size, seq_len):
super(BiGRU, self).__init__(name_scope)
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
self.full_name(),
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(learning_rate=30),
is_sparse=False)
h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
h_0 = to_variable(h_0)
self._fc1 = FC(self.full_name(),
size=self.hid_dim * 3,
num_flatten_dims=2)
self._fc2 = FC(self.full_name(), size=self.fc_hid_dim, act="tanh")
self._fc_prediction = FC(self.full_name(),
size=self.class_dim,
act="softmax")
self._gru_forward = DynamicGRU(
self.full_name(), size=self.hid_dim, h_0=h_0, is_reverse=False)
self._gru_backward = DynamicGRU(
self.full_name(), size=self.hid_dim, h_0=h_0, is_reverse=True)
def forward(self, inputs, label=None):
emb = self.embedding(inputs)
o_np_mask = to_variable(
inputs.numpy() != self.dict_dim).astype('float32')
mask_emb = fluid.layers.expand(
to_variable(o_np_mask), [1, self.hid_dim])
emb = emb * mask_emb
emb = fluid.layers.reshape(
emb, shape=[self.batch_size, -1, self.hid_dim])
fc_1 = self._fc1(emb)
gru_forward = self._gru_forward(fc_1)
gru_backward = self._gru_backward(fc_1)
gru_forward_tanh = fluid.layers.tanh(gru_forward)
gru_backward_tanh = fluid.layers.tanh(gru_backward)
encoded_vector = fluid.layers.concat(
input=[gru_forward_tanh, gru_backward_tanh], axis=2)
fc_2 = self._fc2(encoded_vector)
prediction = self._fc_prediction(fc_2)
if label:
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, prediction, acc return avg_cost, prediction, acc
else: else:
return prediction return prediction
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册