From 45f9a3dffeef657db1f3d6bfdc84a518d3a0230e Mon Sep 17 00:00:00 2001 From: JesseyXujin Date: Thu, 19 Dec 2019 16:18:16 +0800 Subject: [PATCH] modify senta dygraph (#4070) * modify senta dygraph * modify details --- dygraph/sentiment/README.md | 34 ++++++++++++++++------ dygraph/sentiment/main.py | 49 +++++++++++++++---------------- dygraph/sentiment/nets.py | 58 ++++++++++++++----------------------- 3 files changed, 70 insertions(+), 71 deletions(-) diff --git a/dygraph/sentiment/README.md b/dygraph/sentiment/README.md index 39efc805..7440fbe6 100755 --- a/dygraph/sentiment/README.md +++ b/dygraph/sentiment/README.md @@ -3,13 +3,12 @@ 情感是人类的一种高级智能行为,为了识别文本的情感倾向,需要深入的语义建模。另外,不同领域(如餐饮、体育)在情感的表达各不相同,因而需要有大规模覆盖各个领域的数据进行模型训练。为此,我们通过基于深度学习的语义模型和大规模数据挖掘解决上述两个问题。效果上,我们基于开源情感倾向分类数据集ChnSentiCorp进行评测。具体数据如下所示: -| 模型 | dev | -| :------| :------ | -| CNN | 90.6% | -| BOW | 90.1% | -| GRU | 90.0% | -| BIGRU | 89.7% | - +| 模型 | dev | test | +| :------| :------ | :------ | +| CNN | 90.6% | 89.7% | +| BOW | 90.1% | 90.3% | +| GRU | 90.0% | 91.1% | +| BIGRU | 89.7% | 89.6% | 动态图文档请见[Dygraph](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/user_guides/howto/dygraph/DyGraph.html) @@ -20,9 +19,11 @@ python版本依赖python 2.7或python 3.5及以上版本。 + #### 安装代码 克隆数据集代码库到本地。 + ```shell git clone https://github.com/PaddlePaddle/models.git cd models/dygraph/sentiment @@ -31,6 +32,7 @@ cd models/dygraph/sentiment #### 数据准备 下载经过预处理的数据,文件解压之后,senta_data目录下会存在训练数据(train.tsv)、开发集数据(dev.tsv)、测试集数据(test.tsv)以及对应的词典(word_dict.txt) + ```shell wget https://baidu-nlp.bj.bcebos.com/sentiment_classification-dataset-1.0.0.tar.gz tar -zxvf sentiment_classification-dataset-1.0.0.tar.gz @@ -40,27 +42,41 @@ tar -zxvf sentiment_classification-dataset-1.0.0.tar.gz 基于示例的数据集,可以运行下面的命令,在训练集(train.tsv)上进行模型训练,并在开发集(dev.tsv)验证。 model_type从bow_net,cnn_net,gru_net,bigru_net中选择。 + ```shell -python main.py --model_type=bow_net +python main.py --model_type=bow_net --do_train=True --do_infer=True --epoch=50 --batch_size=256 ``` #### 模型预测 利用已有模型,可以运行下面命令,对未知label的数据(test.tsv)进行预测。 + ```shell -python main.py --do_train false --do_infer true --checkpoints ./path_to_save_models +python main.py --model_type=bow_net --do_train=False --do_infer=True --epoch=1 --checkpoints=./path_to_save_models ``` +#### 模型参数 + +1. batch_size, 根据模型情况和GPU占用率选择batch_size, 建议cnn/bow选择batch_size=256, gru/bigru选择batch_size=16。 +2. padding_size默认为150。 +3. epoch, training时默认设置为50,infer默认为1。 +4. learning_rate默认为0.002。 + + ## 进阶使用 #### 任务定义 传统的情感分类主要基于词典或者特征工程的方式进行分类,这种方法需要繁琐的人工特征设计和先验知识,理解停留于浅层并且扩展泛化能力差。为了避免传统方法的局限,我们采用近年来飞速发展的深度学习技术。基于深度学习的情感分类不依赖于人工特征,它能够端到端的对输入文本进行语义理解,并基于语义表示进行情感倾向的判断。 + #### 模型原理介绍 本项目针对情感倾向性分类问题,: + CNN(Convolutional Neural Networks),是一个基础的序列模型,能处理变长序列输入,提取局部区域之内的特征; ++ BOW(Bag Of Words)模型,是一个非序列模型,使用基本的全连接结构; ++ GRU(Gated Recurrent Unit),序列模型,能够较好地解决序列文本中长距离依赖的问题; ++ BI-GRU(Bidirectional Gated Recurrent Unit),序列模型,采用双向双层GRU结构,更好地捕获句子中的语义特征; #### 数据格式说明 diff --git a/dygraph/sentiment/main.py b/dygraph/sentiment/main.py index a9d327cb..1502a4e6 100755 --- a/dygraph/sentiment/main.py +++ b/dygraph/sentiment/main.py @@ -28,8 +28,8 @@ model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints") train_g = ArgumentGroup(parser, "training", "training options.") -train_g.add_arg("epoch", int, 10, "Number of epoches for training.") -train_g.add_arg("save_steps", int, 1000, +train_g.add_arg("epoch", int, 50, "Number of epoches for training.") +train_g.add_arg("save_steps", int, 200, "The steps interval to save checkpoints.") train_g.add_arg("validation_steps", int, 200, "The steps interval to evaluate model performance.") @@ -47,7 +47,7 @@ data_g.add_arg("data_dir", str, "./senta_data/", "Path to training data.") data_g.add_arg("vocab_path", str, "./senta_data/word_dict.txt", "Vocabulary path.") data_g.add_arg("vocab_size", int, 33256, "Vocabulary path.") -data_g.add_arg("batch_size", int, 16, +data_g.add_arg("batch_size", int, 256, "Total examples' number in batch for training.") data_g.add_arg("random_seed", int, 0, "Random seed.") @@ -56,7 +56,7 @@ run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.") run_type_g.add_arg("do_train", bool, True, "Whether to perform training.") run_type_g.add_arg("do_val", bool, True, "Whether to perform evaluation.") run_type_g.add_arg("do_infer", bool, False, "Whether to perform inference.") -run_type_g.add_arg("profile_steps", int, 15000, +run_type_g.add_arg("profile_steps", int, 60000, "The steps interval to record the performance.") train_g.add_arg("model_type", str, "bow_net", "Model type of training.") parser.add_argument("--ce", action="store_true", help="run ce") @@ -82,7 +82,6 @@ def profile_context(profile=True): else: yield - if args.ce: print("ce mode") seed = 90 @@ -144,10 +143,11 @@ def train(): args.padding_size) elif args.model_type == 'bigru_net': model = nets.BiGRU("bigru_net", args.vocab_size, args.batch_size, - args.padding_size) + args.padding_size) sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr) steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] + gru_hidden_data = np.zeros((args.batch_size, 128), dtype='float32') for eop in range(args.epoch): time_begin = time.time() for batch_id, data in enumerate(train_data_generator()): @@ -162,7 +162,7 @@ def train(): 'constant', constant_values=(args.vocab_size)) for x in data - ]).astype('int64').reshape(-1)) + ]).astype('int64').reshape(-1, 1)) label = to_variable( np.array([x[1] for x in data]).astype('int64').reshape( args.batch_size, 1)) @@ -176,7 +176,7 @@ def train(): total_cost.append(avg_cost.numpy() * word_num) total_acc.append(acc.numpy() * word_num) total_num_seqs.append(word_num) - + if steps % args.skip_steps == 0: time_end = time.time() used_time = time_end - time_begin @@ -193,6 +193,7 @@ def train(): total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], [] model.eval() eval_steps = 0 + gru_hidden_data = np.zeros((args.batch_size, 128), dtype='float32') for eval_batch_id, eval_data in enumerate( eval_data_generator()): eval_np_doc = np.array([ @@ -206,7 +207,7 @@ def train(): eval_label = to_variable( np.array([x[1] for x in eval_data]).astype( 'int64').reshape(args.batch_size, 1)) - eval_doc = to_variable(eval_np_doc.reshape(-1)) + eval_doc = to_variable(eval_np_doc.reshape(-1, 1)) eval_avg_cost, eval_prediction, eval_acc = model( eval_doc, eval_label) eval_np_mask = ( @@ -239,7 +240,7 @@ def train(): np.sum(total_eval_num_seqs))) if steps % args.save_steps == 0: - save_path = "save_dir_" + str(steps) + save_path = args.checkpoints+"/"+"save_dir_" + str(steps) print('save model to: ' + save_path) fluid.dygraph.save_dygraph(model.state_dict(), save_path) @@ -270,25 +271,25 @@ def infer(): model_infer = nets.GRU("gru_net", args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'bigru_net': - model_infer = nets.BiGRU("bigru_net", args.vocab_size, - args.batch_size, args.padding_size) + model_infer = nets.BiGRU("bigru_net", args.vocab_size, args.batch_size, + args.padding_size) print('Do inferring ...... ') - total_acc, total_num_seqs = [], [] restore, _ = fluid.load_dygraph(args.checkpoints) - cnn_net_infer.set_dict(restore) - cnn_net_infer.eval() - + model_infer.set_dict(restore) + model_infer.eval() + total_acc, total_num_seqs = [], [] steps = 0 time_begin = time.time() for batch_id, data in enumerate(infer_data_generator()): steps += 1 - np_doc = np.array([ - np.pad(x[0][0:args.padding_size], - (0, args.padding_size - len(x[0][0:args.padding_size])), - 'constant', - constant_values=(args.vocab_size)) for x in data + np_doc = np.array([np.pad(x[0][0:args.padding_size], + (0, args.padding_size - + len(x[0][0:args.padding_size])), + 'constant', + constant_values=(args.vocab_size)) + for x in data ]).astype('int64').reshape(-1, 1) - doc = to_variable(np_doc) + doc = to_variable(np_doc.reshape(-1, 1)) label = to_variable( np.array([x[1] for x in data]).astype('int64').reshape( args.batch_size, 1)) @@ -297,10 +298,8 @@ def infer(): word_num = np.sum(mask) total_acc.append(acc.numpy() * word_num) total_num_seqs.append(word_num) - time_end = time.time() - used_time = time_end - time_begin - + used_time = time_end - time_begin print("Final infer result: ave acc: %f, speed: %f steps/s" % (np.sum(total_acc) / np.sum(total_num_seqs), steps / used_time)) diff --git a/dygraph/sentiment/nets.py b/dygraph/sentiment/nets.py index 8e732376..f5b8e91b 100755 --- a/dygraph/sentiment/nets.py +++ b/dygraph/sentiment/nets.py @@ -17,7 +17,7 @@ from paddle.fluid.dygraph import GRUUnit from paddle.fluid.dygraph.base import to_variable import numpy as np - + class DynamicGRU(fluid.dygraph.Layer): def __init__(self, scope_name, @@ -29,7 +29,7 @@ class DynamicGRU(fluid.dygraph.Layer): candidate_activation='tanh', h_0=None, origin_mode=False, - init_size=None): + init_size = None): super(DynamicGRU, self).__init__(scope_name) self.gru_unit = GRUUnit( self.full_name(), @@ -42,26 +42,22 @@ class DynamicGRU(fluid.dygraph.Layer): self.size = size self.h_0 = h_0 self.is_reverse = is_reverse - def forward(self, inputs): hidden = self.h_0 res = [] for i in range(inputs.shape[1]): if self.is_reverse: i = inputs.shape[1] - 1 - i - input_ = inputs[:, i:i + 1, :] - input_ = fluid.layers.reshape( - input_, [-1, input_.shape[2]], inplace=False) + input_ = inputs[ :, i:i+1, :] + input_ = fluid.layers.reshape(input_, [-1, input_.shape[2]], inplace=False) hidden, reset, gate = self.gru_unit(input_, hidden) - hidden_ = fluid.layers.reshape( - hidden, [-1, 1, hidden.shape[1]], inplace=False) + hidden_ = fluid.layers.reshape(hidden, [-1, 1, hidden.shape[1]], inplace=False) res.append(hidden_) if self.is_reverse: res = res[::-1] res = fluid.layers.concat(res, axis=1) return res - class SimpleConvPool(fluid.dygraph.Layer): def __init__(self, name_scope, @@ -111,10 +107,9 @@ class CNN(fluid.dygraph.Layer): self._fc_prediction = FC(self.full_name(), size=self.class_dim, act="softmax") - def forward(self, inputs, label=None): emb = self.embedding(inputs) - o_np_mask = (np.expand_dims(inputs.numpy(), -1) != self.dict_dim).astype('float32') + o_np_mask = (inputs.numpy() != self.dict_dim).astype('float32') mask_emb = fluid.layers.expand( to_variable(o_np_mask), [1, self.hid_dim]) emb = emb * mask_emb @@ -147,20 +142,19 @@ class BOW(fluid.dygraph.Layer): size=[self.dict_dim + 1, self.emb_dim], dtype='float32', is_sparse=False) - self._fc1 = FC(self.full_name(), size=self.fc_hid_dim, act="tanh") - self._fc2 = FC(self.full_name(), size=self.class_dim, act="tanh") + self._fc1 = FC(self.full_name(), size=self.hid_dim, act="tanh") + self._fc2 = FC(self.full_name(), size=self.fc_hid_dim, act="tanh") self._fc_prediction = FC(self.full_name(), size=self.class_dim, act="softmax") - def forward(self, inputs, label=None): emb = self.embedding(inputs) - o_np_mask = (np.expand_dims(inputs.numpy(), -1) != self.dict_dim).astype('float32') + o_np_mask = (inputs.numpy() != self.dict_dim).astype('float32') mask_emb = fluid.layers.expand( to_variable(o_np_mask), [1, self.hid_dim]) emb = emb * mask_emb emb = fluid.layers.reshape( - emb, shape=[-1, 1, self.seq_len, self.hid_dim]) + emb, shape=[-1, self.seq_len, self.hid_dim]) bow_1 = fluid.layers.reduce_sum(emb, dim=1) bow_1 = fluid.layers.tanh(bow_1) fc_1 = self._fc1(bow_1) @@ -193,26 +187,22 @@ class GRU(fluid.dygraph.Layer): is_sparse=False) h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32") h_0 = to_variable(h_0) - self._fc1 = FC(self.full_name(), - size=self.hid_dim * 3, - num_flatten_dims=2) + self._fc1 = FC(self.full_name(), size=self.hid_dim*3, num_flatten_dims=2) self._fc2 = FC(self.full_name(), size=self.fc_hid_dim, act="tanh") self._fc_prediction = FC(self.full_name(), size=self.class_dim, act="softmax") - self._gru = DynamicGRU(self.full_name(), size=self.hid_dim, h_0=h_0) - + self._gru = DynamicGRU(self.full_name(), size= self.hid_dim, h_0=h_0) def forward(self, inputs, label=None): emb = self.embedding(inputs) - o_np_mask = to_variable( - np.expand_dims(inputs.numpy(), -1) != self.dict_dim).astype('float32') + o_np_mask =to_variable(inputs.numpy() != self.dict_dim).astype('float32') mask_emb = fluid.layers.expand( to_variable(o_np_mask), [1, self.hid_dim]) emb = emb * mask_emb - emb = fluid.layers.reshape( - emb, shape=[self.batch_size, -1, self.hid_dim]) + emb = fluid.layers.reshape(emb, shape=[self.batch_size, -1, self.hid_dim]) fc_1 = self._fc1(emb) gru_hidden = self._gru(fc_1) + gru_hidden = fluid.layers.reduce_max(gru_hidden, dim=1) tanh_1 = fluid.layers.tanh(gru_hidden) fc_2 = self._fc2(tanh_1) prediction = self._fc_prediction(fc_2) @@ -224,7 +214,7 @@ class GRU(fluid.dygraph.Layer): else: return prediction - + class BiGRU(fluid.dygraph.Layer): def __init__(self, name_scope, dict_dim, batch_size, seq_len): super(BiGRU, self).__init__(name_scope) @@ -243,27 +233,21 @@ class BiGRU(fluid.dygraph.Layer): is_sparse=False) h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32") h_0 = to_variable(h_0) - self._fc1 = FC(self.full_name(), - size=self.hid_dim * 3, - num_flatten_dims=2) + self._fc1 = FC(self.full_name(), size=self.hid_dim*3, num_flatten_dims=2) self._fc2 = FC(self.full_name(), size=self.fc_hid_dim, act="tanh") self._fc_prediction = FC(self.full_name(), size=self.class_dim, act="softmax") - self._gru_forward = DynamicGRU( - self.full_name(), size=self.hid_dim, h_0=h_0, is_reverse=False) - self._gru_backward = DynamicGRU( - self.full_name(), size=self.hid_dim, h_0=h_0, is_reverse=True) + self._gru_forward = DynamicGRU(self.full_name(), size= self.hid_dim, h_0=h_0,is_reverse=False) + self._gru_backward = DynamicGRU(self.full_name(), size= self.hid_dim, h_0=h_0,is_reverse=True) def forward(self, inputs, label=None): emb = self.embedding(inputs) - o_np_mask = to_variable( - np.expand_dims(inputs.numpy(), -1) != self.dict_dim).astype('float32') + o_np_mask =to_variable(inputs.numpy() != self.dict_dim).astype('float32') mask_emb = fluid.layers.expand( to_variable(o_np_mask), [1, self.hid_dim]) emb = emb * mask_emb - emb = fluid.layers.reshape( - emb, shape=[self.batch_size, -1, self.hid_dim]) + emb = fluid.layers.reshape(emb, shape=[self.batch_size, -1, self.hid_dim]) fc_1 = self._fc1(emb) gru_forward = self._gru_forward(fc_1) gru_backward = self._gru_backward(fc_1) -- GitLab