update emotion_detection_textcnn (#2105)

751f30b8 · jm_12138 · GitHub · beec7ed2 · 751f30b8 · 751f30b8
4 changed file
--- a/modules/text/sentiment_analysis/emotion_detection_textcnn/README.md
+++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/README.md
 # emotion_detection_textcnn

 |模型名称|emotion_detection_textcnn|
-| :--- | :---: | 
+| :--- | :---: |
 |类别|文本-情感分析|
 |网络|TextCNN|
 |数据集|百度自建数据集|
@@ -25,7 +25,7 @@
 - ### 1、环境依赖  

  - paddlepaddle >= 1.8.0
-  
+
  - paddlehub >= 1.8.0    | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst)

 - ### 2、安装
@@ -51,12 +51,12 @@
  - ```shell
    $ hub run emotion_detection_textcnn --input_file test.txt
    ```
-    
+
    - test.txt 存放待预测文本， 如：
      > 这家餐厅很好吃
- 
+
      > 这部电影真的很差劲
-      
+
  - 通过命令行方式实现文字识别模型的调用，更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst)

 - ### 2、预测代码示例
@@ -75,12 +75,12 @@
        print(result['positive_probs'])
        print(result['neutral_probs'])
        print(result['negative_probs'])
-        
+
    # 今天天气真好 2 positive 0.9267 0.0714 0.0019
    # 湿纸巾是干垃圾 1 neutral 0.0062 0.9896 0.0042
    # 别来吵我 0 negative 0.0732 0.1477 0.7791
    ```
-       
+
 - ### 3、API

  - ```python
@@ -184,7 +184,11 @@
 * 1.2.0

  模型升级，支持用于文本分类，文本匹配等各种任务迁移学习
-  
+
+* 1.3.0
+
+  移除 Fluid API
+
  - ```shell
-    $ hub install emotion_detection_textcnn==1.2.0
+    $ hub install emotion_detection_textcnn==1.3.0
    ```
--- a/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py
+++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py
-# -*- coding:utf-8 -*-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
 import math
 import os

-import paddle.fluid as fluid
 import paddlehub as hub
-from paddlehub.common.paddle_helper import add_vars_prefix
-from paddlehub.module.module import moduleinfo, serving
-
-from emotion_detection_textcnn.net import textcnn_net
-from emotion_detection_textcnn.processor import load_vocab, preprocess, postprocess
-
-
-@moduleinfo(
-    name="emotion_detection_textcnn",
-    version="1.2.0",
-    summary="Baidu's open-source Emotion Detection Model(TextCNN).",
-    author="baidu-nlp",
-    author_email="",
-    type="nlp/sentiment_analysis")
+from .processor import load_vocab
+from .processor import postprocess
+from .processor import preprocess
+from paddlehub.module.module import moduleinfo
+from paddlehub.module.module import serving
+
+
+@moduleinfo(name="emotion_detection_textcnn",
+            version="1.3.0",
+            summary="Baidu's open-source Emotion Detection Model(TextCNN).",
+            author="baidu-nlp",
+            author_email="",
+            type="nlp/sentiment_analysis")
 class EmotionDetectionTextCNN(hub.NLPPredictionModule):
+
    def _initialize(self):
        """
        initialize with the necessary elements
@@ -45,111 +39,6 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule):
            self._word_seg_module = hub.Module(name="lac")
        return self._word_seg_module

-    def context(self, trainable=False, max_seq_len=128, num_slots=1):
-        """
-        Get the input ,output and program of the pretrained emotion_detection_textcnn
-
-        Args:
-             trainable(bool): Whether fine-tune the pretrained parameters of emotion_detection_textcnn or not.
-             max_seq_len (int): It will limit the total sequence returned so that it has a maximum length.
-             num_slots(int): It's number of data inputted to the model, selectted as following options:
-
-                 - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task.
-                 - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise).
-                 - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise).
-
-        Returns:
-             inputs(dict): the input variables of emotion_detection_textcnn (words)
-             outputs(dict): the output variables of input words (word embeddings and label probilities);
-                 the sentence embedding and sequence length of the first input text.
-             main_program(Program): the main_program of emotion_detection_textcnn with pretrained prameters
-        """
-        assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots
-        main_program = fluid.Program()
-        startup_program = fluid.Program()
-        with fluid.program_guard(main_program, startup_program):
-            text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0)
-            seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0)
-            seq_len_used = fluid.layers.squeeze(seq_len, axes=[1])
-
-            # Add embedding layer.
-            w_param_attrs = fluid.ParamAttr(
-                name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable)
-            dict_dim = 240466
-            emb_1 = fluid.layers.embedding(
-                input=text_1,
-                size=[dict_dim, 128],
-                is_sparse=True,
-                padding_idx=dict_dim - 1,
-                dtype='float32',
-                param_attr=w_param_attrs)
-            emb_1_name = emb_1.name
-            data_list = [text_1]
-            emb_name_list = [emb_1_name]
-
-            # Add lstm layer.
-            pred, fc = textcnn_net(emb_1, seq_len_used)
-            pred_name = pred.name
-            fc_name = fc.name
-
-            if num_slots > 1:
-                text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0)
-                emb_2 = fluid.embedding(
-                    input=text_2,
-                    size=[dict_dim, 128],
-                    is_sparse=True,
-                    padding_idx=dict_dim - 1,
-                    dtype='float32',
-                    param_attr=w_param_attrs)
-                emb_2_name = emb_2.name
-                data_list.append(text_2)
-                emb_name_list.append(emb_2_name)
-
-            if num_slots > 2:
-                text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0)
-                emb_3 = fluid.embedding(
-                    input=text_3,
-                    size=[dict_dim, 128],
-                    is_sparse=True,
-                    padding_idx=dict_dim - 1,
-                    dtype='float32',
-                    param_attr=w_param_attrs)
-                emb_3_name = emb_3.name
-                data_list.append(text_3)
-                emb_name_list.append(emb_3_name)
-
-            variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"],
-                                    list(main_program.global_block().vars.keys()))
-            prefix_name = "@HUB_{}@".format(self.name)
-            add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names)
-
-            for param in main_program.global_block().iter_parameters():
-                param.trainable = trainable
-
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-
-            # Load the emotion_detection_textcnn pretrained model.
-            def if_exist(var):
-                return os.path.exists(os.path.join(self.pretrained_model_path, var.name))
-
-            fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist)
-
-            inputs = {'seq_len': seq_len}
-            outputs = {
-                "class_probs": main_program.global_block().vars[prefix_name + pred_name],
-                "sentence_feature": main_program.global_block().vars[prefix_name + fc_name]
-            }
-            for index, data in enumerate(data_list):
-                if index == 0:
-                    inputs['text'] = data
-                    outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]]
-                else:
-                    inputs['text_%s' % (index + 1)] = data
-                    outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name +
-                                                                                       emb_name_list[index]]
-            return inputs, outputs, main_program
-
    @serving
    def emotion_classify(self, texts=[], data={}, use_gpu=False, batch_size=1):
        """
@@ -208,22 +97,3 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule):
        """
        self.labels = {"positive": 2, "negative": 0, "neutral": 1}
        return self.labels
-
-
-if __name__ == "__main__":
-    emotion_detection_textcnn = EmotionDetectionTextCNN()
-    inputs, outputs, main_program = emotion_detection_textcnn.context(num_slots=3)
-    print(inputs)
-    print(outputs)
-    # Data to be predicted
-    test_text = ["今天天气真好", "湿纸巾是干垃圾", "别来吵我"]
-
-    input_dict = {"text": test_text}
-    results = emotion_detection_textcnn.emotion_classify(data=input_dict, batch_size=2)
-    for result in results:
-        print(result['text'])
-        print(result['emotion_label'])
-        print(result['emotion_key'])
-        print(result['positive_probs'])
-        print(result['negative_probs'])
-        print(result['neutral_probs'])
--- a/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py
+++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py
-# -*- coding:utf-8 -*-
-import paddle.fluid as fluid
-
-
-def textcnn_net(emb, seq_len, emb_dim=128, hid_dim=128, hid_dim2=96, class_dim=3, win_sizes=None):
-    """
-    Textcnn_net
-    """
-    if win_sizes is None:
-        win_sizes = [1, 2, 3]
-
-    # unpad the token_feature
-    unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len)
-
-    # convolution layer
-    convs = []
-    for win_size in win_sizes:
-        conv_h = fluid.nets.sequence_conv_pool(
-            input=unpad_feature, num_filters=hid_dim, filter_size=win_size, act="tanh", pool_type="max")
-        convs.append(conv_h)
-    convs_out = fluid.layers.concat(input=convs, axis=1)
-
-    # full connect layer
-    fc_1 = fluid.layers.fc(input=[convs_out], size=hid_dim2, act="tanh")
-    # softmax layer
-    prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax")
-
-    return prediction, fc_1
--- a/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py
+++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py
-# -*- coding:utf-8 -*-
 import io
+
 import numpy as np