diff --git a/modules/text/sentiment_analysis/emotion_detection_textcnn/README.md b/modules/text/sentiment_analysis/emotion_detection_textcnn/README.md index 7cc694495cc431604624b78b0986dbdbb40f198a..c8624b3735ec2b420b562f100b914c6703f855ae 100644 --- a/modules/text/sentiment_analysis/emotion_detection_textcnn/README.md +++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/README.md @@ -1,7 +1,7 @@ # emotion_detection_textcnn |模型名称|emotion_detection_textcnn| -| :--- | :---: | +| :--- | :---: | |类别|文本-情感分析| |网络|TextCNN| |数据集|百度自建数据集| @@ -25,7 +25,7 @@ - ### 1、环境依赖 - paddlepaddle >= 1.8.0 - + - paddlehub >= 1.8.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 @@ -51,12 +51,12 @@ - ```shell $ hub run emotion_detection_textcnn --input_file test.txt ``` - + - test.txt 存放待预测文本, 如: > 这家餐厅很好吃 - + > 这部电影真的很差劲 - + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - ### 2、预测代码示例 @@ -75,12 +75,12 @@ print(result['positive_probs']) print(result['neutral_probs']) print(result['negative_probs']) - + # 今天天气真好 2 positive 0.9267 0.0714 0.0019 # 湿纸巾是干垃圾 1 neutral 0.0062 0.9896 0.0042 # 别来吵我 0 negative 0.0732 0.1477 0.7791 ``` - + - ### 3、API - ```python @@ -184,7 +184,11 @@ * 1.2.0 模型升级,支持用于文本分类,文本匹配等各种任务迁移学习 - + +* 1.3.0 + + 移除 Fluid API + - ```shell - $ hub install emotion_detection_textcnn==1.2.0 + $ hub install emotion_detection_textcnn==1.3.0 ``` diff --git a/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py b/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py index bfe7d54f85963f6b054e3c89e564a8fdee511969..acfc7be510b97a814bd53553830fd3698ed25f12 100644 --- a/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py +++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py @@ -1,28 +1,22 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import math import os -import paddle.fluid as fluid import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo, serving - -from emotion_detection_textcnn.net import textcnn_net -from emotion_detection_textcnn.processor import load_vocab, preprocess, postprocess - - -@moduleinfo( - name="emotion_detection_textcnn", - version="1.2.0", - summary="Baidu's open-source Emotion Detection Model(TextCNN).", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") +from .processor import load_vocab +from .processor import postprocess +from .processor import preprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import serving + + +@moduleinfo(name="emotion_detection_textcnn", + version="1.3.0", + summary="Baidu's open-source Emotion Detection Model(TextCNN).", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") class EmotionDetectionTextCNN(hub.NLPPredictionModule): + def _initialize(self): """ initialize with the necessary elements @@ -45,111 +39,6 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule): self._word_seg_module = hub.Module(name="lac") return self._word_seg_module - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained emotion_detection_textcnn - - Args: - trainable(bool): Whether fine-tune the pretrained parameters of emotion_detection_textcnn or not. - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of emotion_detection_textcnn (words) - outputs(dict): the output variables of input words (word embeddings and label probilities); - the sentence embedding and sequence length of the first input text. - main_program(Program): the main_program of emotion_detection_textcnn with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) - seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) - seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) - - # Add embedding layer. - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) - dict_dim = 240466 - emb_1 = fluid.layers.embedding( - input=text_1, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - # Add lstm layer. - pred, fc = textcnn_net(emb_1, seq_len_used) - pred_name = pred.name - fc_name = fc.name - - if num_slots > 1: - text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], - list(main_program.global_block().vars.keys())) - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) - - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # Load the emotion_detection_textcnn pretrained model. - def if_exist(var): - return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {'seq_len': seq_len} - outputs = { - "class_probs": main_program.global_block().vars[prefix_name + pred_name], - "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] - } - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + - emb_name_list[index]] - return inputs, outputs, main_program - @serving def emotion_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): """ @@ -208,22 +97,3 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule): """ self.labels = {"positive": 2, "negative": 0, "neutral": 1} return self.labels - - -if __name__ == "__main__": - emotion_detection_textcnn = EmotionDetectionTextCNN() - inputs, outputs, main_program = emotion_detection_textcnn.context(num_slots=3) - print(inputs) - print(outputs) - # Data to be predicted - test_text = ["今天天气真好", "湿纸巾是干垃圾", "别来吵我"] - - input_dict = {"text": test_text} - results = emotion_detection_textcnn.emotion_classify(data=input_dict, batch_size=2) - for result in results: - print(result['text']) - print(result['emotion_label']) - print(result['emotion_key']) - print(result['positive_probs']) - print(result['negative_probs']) - print(result['neutral_probs']) diff --git a/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py b/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py deleted file mode 100644 index 5be0d9ea759f93eeefc9a87eef283546422f1026..0000000000000000000000000000000000000000 --- a/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py +++ /dev/null @@ -1,28 +0,0 @@ -# -*- coding:utf-8 -*- -import paddle.fluid as fluid - - -def textcnn_net(emb, seq_len, emb_dim=128, hid_dim=128, hid_dim2=96, class_dim=3, win_sizes=None): - """ - Textcnn_net - """ - if win_sizes is None: - win_sizes = [1, 2, 3] - - # unpad the token_feature - unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) - - # convolution layer - convs = [] - for win_size in win_sizes: - conv_h = fluid.nets.sequence_conv_pool( - input=unpad_feature, num_filters=hid_dim, filter_size=win_size, act="tanh", pool_type="max") - convs.append(conv_h) - convs_out = fluid.layers.concat(input=convs, axis=1) - - # full connect layer - fc_1 = fluid.layers.fc(input=[convs_out], size=hid_dim2, act="tanh") - # softmax layer - prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax") - - return prediction, fc_1 diff --git a/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py b/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py index 10bd655af5cac809b51b2e10ad8325f9272a90e5..02d090ec98f6115f1d3032a1cfa92180347488c5 100644 --- a/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py +++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py @@ -1,5 +1,5 @@ -# -*- coding:utf-8 -*- import io + import numpy as np