未验证 提交 751f30b8 编写于 作者: jm_12138's avatar jm_12138 提交者: GitHub

update emotion_detection_textcnn (#2105)

上级 beec7ed2
# emotion_detection_textcnn
|模型名称|emotion_detection_textcnn|
| :--- | :---: |
| :--- | :---: |
|类别|文本-情感分析|
|网络|TextCNN|
|数据集|百度自建数据集|
......@@ -25,7 +25,7 @@
- ### 1、环境依赖
- paddlepaddle >= 1.8.0
- paddlehub >= 1.8.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst)
- ### 2、安装
......@@ -51,12 +51,12 @@
- ```shell
$ hub run emotion_detection_textcnn --input_file test.txt
```
- test.txt 存放待预测文本, 如:
> 这家餐厅很好吃
> 这部电影真的很差劲
- 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst)
- ### 2、预测代码示例
......@@ -75,12 +75,12 @@
print(result['positive_probs'])
print(result['neutral_probs'])
print(result['negative_probs'])
# 今天天气真好 2 positive 0.9267 0.0714 0.0019
# 湿纸巾是干垃圾 1 neutral 0.0062 0.9896 0.0042
# 别来吵我 0 negative 0.0732 0.1477 0.7791
```
- ### 3、API
- ```python
......@@ -184,7 +184,11 @@
* 1.2.0
模型升级,支持用于文本分类,文本匹配等各种任务迁移学习
* 1.3.0
移除 Fluid API
- ```shell
$ hub install emotion_detection_textcnn==1.2.0
$ hub install emotion_detection_textcnn==1.3.0
```
# -*- coding:utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import os
import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.common.paddle_helper import add_vars_prefix
from paddlehub.module.module import moduleinfo, serving
from emotion_detection_textcnn.net import textcnn_net
from emotion_detection_textcnn.processor import load_vocab, preprocess, postprocess
@moduleinfo(
name="emotion_detection_textcnn",
version="1.2.0",
summary="Baidu's open-source Emotion Detection Model(TextCNN).",
author="baidu-nlp",
author_email="",
type="nlp/sentiment_analysis")
from .processor import load_vocab
from .processor import postprocess
from .processor import preprocess
from paddlehub.module.module import moduleinfo
from paddlehub.module.module import serving
@moduleinfo(name="emotion_detection_textcnn",
version="1.3.0",
summary="Baidu's open-source Emotion Detection Model(TextCNN).",
author="baidu-nlp",
author_email="",
type="nlp/sentiment_analysis")
class EmotionDetectionTextCNN(hub.NLPPredictionModule):
def _initialize(self):
"""
initialize with the necessary elements
......@@ -45,111 +39,6 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule):
self._word_seg_module = hub.Module(name="lac")
return self._word_seg_module
def context(self, trainable=False, max_seq_len=128, num_slots=1):
"""
Get the input ,output and program of the pretrained emotion_detection_textcnn
Args:
trainable(bool): Whether fine-tune the pretrained parameters of emotion_detection_textcnn or not.
max_seq_len (int): It will limit the total sequence returned so that it has a maximum length.
num_slots(int): It's number of data inputted to the model, selectted as following options:
- 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task.
- 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise).
- 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise).
Returns:
inputs(dict): the input variables of emotion_detection_textcnn (words)
outputs(dict): the output variables of input words (word embeddings and label probilities);
the sentence embedding and sequence length of the first input text.
main_program(Program): the main_program of emotion_detection_textcnn with pretrained prameters
"""
assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0)
seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0)
seq_len_used = fluid.layers.squeeze(seq_len, axes=[1])
# Add embedding layer.
w_param_attrs = fluid.ParamAttr(
name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable)
dict_dim = 240466
emb_1 = fluid.layers.embedding(
input=text_1,
size=[dict_dim, 128],
is_sparse=True,
padding_idx=dict_dim - 1,
dtype='float32',
param_attr=w_param_attrs)
emb_1_name = emb_1.name
data_list = [text_1]
emb_name_list = [emb_1_name]
# Add lstm layer.
pred, fc = textcnn_net(emb_1, seq_len_used)
pred_name = pred.name
fc_name = fc.name
if num_slots > 1:
text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0)
emb_2 = fluid.embedding(
input=text_2,
size=[dict_dim, 128],
is_sparse=True,
padding_idx=dict_dim - 1,
dtype='float32',
param_attr=w_param_attrs)
emb_2_name = emb_2.name
data_list.append(text_2)
emb_name_list.append(emb_2_name)
if num_slots > 2:
text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0)
emb_3 = fluid.embedding(
input=text_3,
size=[dict_dim, 128],
is_sparse=True,
padding_idx=dict_dim - 1,
dtype='float32',
param_attr=w_param_attrs)
emb_3_name = emb_3.name
data_list.append(text_3)
emb_name_list.append(emb_3_name)
variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"],
list(main_program.global_block().vars.keys()))
prefix_name = "@HUB_{}@".format(self.name)
add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names)
for param in main_program.global_block().iter_parameters():
param.trainable = trainable
place = fluid.CPUPlace()
exe = fluid.Executor(place)
# Load the emotion_detection_textcnn pretrained model.
def if_exist(var):
return os.path.exists(os.path.join(self.pretrained_model_path, var.name))
fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist)
inputs = {'seq_len': seq_len}
outputs = {
"class_probs": main_program.global_block().vars[prefix_name + pred_name],
"sentence_feature": main_program.global_block().vars[prefix_name + fc_name]
}
for index, data in enumerate(data_list):
if index == 0:
inputs['text'] = data
outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]]
else:
inputs['text_%s' % (index + 1)] = data
outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name +
emb_name_list[index]]
return inputs, outputs, main_program
@serving
def emotion_classify(self, texts=[], data={}, use_gpu=False, batch_size=1):
"""
......@@ -208,22 +97,3 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule):
"""
self.labels = {"positive": 2, "negative": 0, "neutral": 1}
return self.labels
if __name__ == "__main__":
emotion_detection_textcnn = EmotionDetectionTextCNN()
inputs, outputs, main_program = emotion_detection_textcnn.context(num_slots=3)
print(inputs)
print(outputs)
# Data to be predicted
test_text = ["今天天气真好", "湿纸巾是干垃圾", "别来吵我"]
input_dict = {"text": test_text}
results = emotion_detection_textcnn.emotion_classify(data=input_dict, batch_size=2)
for result in results:
print(result['text'])
print(result['emotion_label'])
print(result['emotion_key'])
print(result['positive_probs'])
print(result['negative_probs'])
print(result['neutral_probs'])
# -*- coding:utf-8 -*-
import paddle.fluid as fluid
def textcnn_net(emb, seq_len, emb_dim=128, hid_dim=128, hid_dim2=96, class_dim=3, win_sizes=None):
"""
Textcnn_net
"""
if win_sizes is None:
win_sizes = [1, 2, 3]
# unpad the token_feature
unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len)
# convolution layer
convs = []
for win_size in win_sizes:
conv_h = fluid.nets.sequence_conv_pool(
input=unpad_feature, num_filters=hid_dim, filter_size=win_size, act="tanh", pool_type="max")
convs.append(conv_h)
convs_out = fluid.layers.concat(input=convs, axis=1)
# full connect layer
fc_1 = fluid.layers.fc(input=[convs_out], size=hid_dim2, act="tanh")
# softmax layer
prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax")
return prediction, fc_1
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册