未验证 提交 751f30b8 编写于 作者: jm_12138's avatar jm_12138 提交者: GitHub

update emotion_detection_textcnn (#2105)

上级 beec7ed2
...@@ -185,6 +185,10 @@ ...@@ -185,6 +185,10 @@
模型升级,支持用于文本分类,文本匹配等各种任务迁移学习 模型升级,支持用于文本分类,文本匹配等各种任务迁移学习
* 1.3.0
移除 Fluid API
- ```shell - ```shell
$ hub install emotion_detection_textcnn==1.2.0 $ hub install emotion_detection_textcnn==1.3.0
``` ```
# -*- coding:utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math import math
import os import os
import paddle.fluid as fluid
import paddlehub as hub import paddlehub as hub
from paddlehub.common.paddle_helper import add_vars_prefix from .processor import load_vocab
from paddlehub.module.module import moduleinfo, serving from .processor import postprocess
from .processor import preprocess
from emotion_detection_textcnn.net import textcnn_net from paddlehub.module.module import moduleinfo
from emotion_detection_textcnn.processor import load_vocab, preprocess, postprocess from paddlehub.module.module import serving
@moduleinfo( @moduleinfo(name="emotion_detection_textcnn",
name="emotion_detection_textcnn", version="1.3.0",
version="1.2.0",
summary="Baidu's open-source Emotion Detection Model(TextCNN).", summary="Baidu's open-source Emotion Detection Model(TextCNN).",
author="baidu-nlp", author="baidu-nlp",
author_email="", author_email="",
type="nlp/sentiment_analysis") type="nlp/sentiment_analysis")
class EmotionDetectionTextCNN(hub.NLPPredictionModule): class EmotionDetectionTextCNN(hub.NLPPredictionModule):
def _initialize(self): def _initialize(self):
""" """
initialize with the necessary elements initialize with the necessary elements
...@@ -45,111 +39,6 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule): ...@@ -45,111 +39,6 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule):
self._word_seg_module = hub.Module(name="lac") self._word_seg_module = hub.Module(name="lac")
return self._word_seg_module return self._word_seg_module
def context(self, trainable=False, max_seq_len=128, num_slots=1):
"""
Get the input ,output and program of the pretrained emotion_detection_textcnn
Args:
trainable(bool): Whether fine-tune the pretrained parameters of emotion_detection_textcnn or not.
max_seq_len (int): It will limit the total sequence returned so that it has a maximum length.
num_slots(int): It's number of data inputted to the model, selectted as following options:
- 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task.
- 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise).
- 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise).
Returns:
inputs(dict): the input variables of emotion_detection_textcnn (words)
outputs(dict): the output variables of input words (word embeddings and label probilities);
the sentence embedding and sequence length of the first input text.
main_program(Program): the main_program of emotion_detection_textcnn with pretrained prameters
"""
assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0)
seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0)
seq_len_used = fluid.layers.squeeze(seq_len, axes=[1])
# Add embedding layer.
w_param_attrs = fluid.ParamAttr(
name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable)
dict_dim = 240466
emb_1 = fluid.layers.embedding(
input=text_1,
size=[dict_dim, 128],
is_sparse=True,
padding_idx=dict_dim - 1,
dtype='float32',
param_attr=w_param_attrs)
emb_1_name = emb_1.name
data_list = [text_1]
emb_name_list = [emb_1_name]
# Add lstm layer.
pred, fc = textcnn_net(emb_1, seq_len_used)
pred_name = pred.name
fc_name = fc.name
if num_slots > 1:
text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0)
emb_2 = fluid.embedding(
input=text_2,
size=[dict_dim, 128],
is_sparse=True,
padding_idx=dict_dim - 1,
dtype='float32',
param_attr=w_param_attrs)
emb_2_name = emb_2.name
data_list.append(text_2)
emb_name_list.append(emb_2_name)
if num_slots > 2:
text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0)
emb_3 = fluid.embedding(
input=text_3,
size=[dict_dim, 128],
is_sparse=True,
padding_idx=dict_dim - 1,
dtype='float32',
param_attr=w_param_attrs)
emb_3_name = emb_3.name
data_list.append(text_3)
emb_name_list.append(emb_3_name)
variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"],
list(main_program.global_block().vars.keys()))
prefix_name = "@HUB_{}@".format(self.name)
add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names)
for param in main_program.global_block().iter_parameters():
param.trainable = trainable
place = fluid.CPUPlace()
exe = fluid.Executor(place)
# Load the emotion_detection_textcnn pretrained model.
def if_exist(var):
return os.path.exists(os.path.join(self.pretrained_model_path, var.name))
fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist)
inputs = {'seq_len': seq_len}
outputs = {
"class_probs": main_program.global_block().vars[prefix_name + pred_name],
"sentence_feature": main_program.global_block().vars[prefix_name + fc_name]
}
for index, data in enumerate(data_list):
if index == 0:
inputs['text'] = data
outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]]
else:
inputs['text_%s' % (index + 1)] = data
outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name +
emb_name_list[index]]
return inputs, outputs, main_program
@serving @serving
def emotion_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): def emotion_classify(self, texts=[], data={}, use_gpu=False, batch_size=1):
""" """
...@@ -208,22 +97,3 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule): ...@@ -208,22 +97,3 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule):
""" """
self.labels = {"positive": 2, "negative": 0, "neutral": 1} self.labels = {"positive": 2, "negative": 0, "neutral": 1}
return self.labels return self.labels
if __name__ == "__main__":
emotion_detection_textcnn = EmotionDetectionTextCNN()
inputs, outputs, main_program = emotion_detection_textcnn.context(num_slots=3)
print(inputs)
print(outputs)
# Data to be predicted
test_text = ["今天天气真好", "湿纸巾是干垃圾", "别来吵我"]
input_dict = {"text": test_text}
results = emotion_detection_textcnn.emotion_classify(data=input_dict, batch_size=2)
for result in results:
print(result['text'])
print(result['emotion_label'])
print(result['emotion_key'])
print(result['positive_probs'])
print(result['negative_probs'])
print(result['neutral_probs'])
# -*- coding:utf-8 -*-
import paddle.fluid as fluid
def textcnn_net(emb, seq_len, emb_dim=128, hid_dim=128, hid_dim2=96, class_dim=3, win_sizes=None):
"""
Textcnn_net
"""
if win_sizes is None:
win_sizes = [1, 2, 3]
# unpad the token_feature
unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len)
# convolution layer
convs = []
for win_size in win_sizes:
conv_h = fluid.nets.sequence_conv_pool(
input=unpad_feature, num_filters=hid_dim, filter_size=win_size, act="tanh", pool_type="max")
convs.append(conv_h)
convs_out = fluid.layers.concat(input=convs, axis=1)
# full connect layer
fc_1 = fluid.layers.fc(input=[convs_out], size=hid_dim2, act="tanh")
# softmax layer
prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax")
return prediction, fc_1
# -*- coding:utf-8 -*-
import io import io
import numpy as np import numpy as np
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册