update emotion_detection_textcnn (#2105)

751f30b8 · jm_12138 · GitHub · beec7ed2 · 751f30b8 · 751f30b8
4 changed file
--- a/modules/text/sentiment_analysis/emotion_detection_textcnn/README.md
+++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/README.md
@@ -185,6 +185,10 @@
  模型升级，支持用于文本分类，文本匹配等各种任务迁移学习
+* 1.3.0
+  移除 Fluid API
  - ```shell
-    $ hub install emotion_detection_textcnn==1.2.0
+    $ hub install emotion_detection_textcnn==1.3.0
    ```
--- a/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py
+++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py
-# -*- coding:utf-8 -*-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
 import math
 import os
-import paddle.fluid as fluid
 import paddlehub as hub
-from paddlehub.common.paddle_helper import add_vars_prefix
+from .processor import load_vocab
-from paddlehub.module.module import moduleinfo, serving
+from .processor import postprocess
+from .processor import preprocess
-from emotion_detection_textcnn.net import textcnn_net
+from paddlehub.module.module import moduleinfo
-from emotion_detection_textcnn.processor import load_vocab, preprocess, postprocess
+from paddlehub.module.module import serving
-@moduleinfo(
+@moduleinfo(name="emotion_detection_textcnn",
-    name="emotion_detection_textcnn",
+            version="1.3.0",
-    version="1.2.0",
            summary="Baidu's open-source Emotion Detection Model(TextCNN).",
            author="baidu-nlp",
            author_email="",
            type="nlp/sentiment_analysis")
 class EmotionDetectionTextCNN(hub.NLPPredictionModule):
    def _initialize(self):
        """
        initialize with the necessary elements
@@ -45,111 +39,6 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule):
            self._word_seg_module = hub.Module(name="lac")
        return self._word_seg_module
-    def context(self, trainable=False, max_seq_len=128, num_slots=1):
-        """
-        Get the input ,output and program of the pretrained emotion_detection_textcnn
-        Args:
-             trainable(bool): Whether fine-tune the pretrained parameters of emotion_detection_textcnn or not.
-             max_seq_len (int): It will limit the total sequence returned so that it has a maximum length.
-             num_slots(int): It's number of data inputted to the model, selectted as following options:
-                 - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task.
-                 - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise).
-                 - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise).
-        Returns:
-             inputs(dict): the input variables of emotion_detection_textcnn (words)
-             outputs(dict): the output variables of input words (word embeddings and label probilities);
-                 the sentence embedding and sequence length of the first input text.
-             main_program(Program): the main_program of emotion_detection_textcnn with pretrained prameters
-        """
-        assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots
-        main_program = fluid.Program()
-        startup_program = fluid.Program()
-        with fluid.program_guard(main_program, startup_program):
-            text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0)
-            seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0)
-            seq_len_used = fluid.layers.squeeze(seq_len, axes=[1])
-            # Add embedding layer.
-            w_param_attrs = fluid.ParamAttr(
-                name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable)
-            dict_dim = 240466
-            emb_1 = fluid.layers.embedding(
-                input=text_1,
-                size=[dict_dim, 128],
-                is_sparse=True,
-                padding_idx=dict_dim - 1,
-                dtype='float32',
-                param_attr=w_param_attrs)
-            emb_1_name = emb_1.name
-            data_list = [text_1]
-            emb_name_list = [emb_1_name]
-            # Add lstm layer.
-            pred, fc = textcnn_net(emb_1, seq_len_used)
-            pred_name = pred.name
-            fc_name = fc.name
-            if num_slots > 1:
-                text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0)
-                emb_2 = fluid.embedding(
-                    input=text_2,
-                    size=[dict_dim, 128],
-                    is_sparse=True,
-                    padding_idx=dict_dim - 1,
-                    dtype='float32',
-                    param_attr=w_param_attrs)
-                emb_2_name = emb_2.name
-                data_list.append(text_2)
-                emb_name_list.append(emb_2_name)
-            if num_slots > 2:
-                text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0)
-                emb_3 = fluid.embedding(
-                    input=text_3,
-                    size=[dict_dim, 128],
-                    is_sparse=True,
-                    padding_idx=dict_dim - 1,
-                    dtype='float32',
-                    param_attr=w_param_attrs)
-                emb_3_name = emb_3.name
-                data_list.append(text_3)
-                emb_name_list.append(emb_3_name)
-            variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"],
-                                    list(main_program.global_block().vars.keys()))
-            prefix_name = "@HUB_{}@".format(self.name)
-            add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names)
-            for param in main_program.global_block().iter_parameters():
-                param.trainable = trainable
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            # Load the emotion_detection_textcnn pretrained model.
-            def if_exist(var):
-                return os.path.exists(os.path.join(self.pretrained_model_path, var.name))
-            fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist)
-            inputs = {'seq_len': seq_len}
-            outputs = {
-                "class_probs": main_program.global_block().vars[prefix_name + pred_name],
-                "sentence_feature": main_program.global_block().vars[prefix_name + fc_name]
-            }
-            for index, data in enumerate(data_list):
-                if index == 0:
-                    inputs['text'] = data
-                    outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]]
-                else:
-                    inputs['text_%s' % (index + 1)] = data
-                    outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name +
-                                                                                       emb_name_list[index]]
-            return inputs, outputs, main_program
    @serving
    def emotion_classify(self, texts=[], data={}, use_gpu=False, batch_size=1):
        """
@@ -208,22 +97,3 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule):
        """
        self.labels = {"positive": 2, "negative": 0, "neutral": 1}
        return self.labels
-if __name__ == "__main__":
-    emotion_detection_textcnn = EmotionDetectionTextCNN()
-    inputs, outputs, main_program = emotion_detection_textcnn.context(num_slots=3)
-    print(inputs)
-    print(outputs)
-    # Data to be predicted
-    test_text = ["今天天气真好", "湿纸巾是干垃圾", "别来吵我"]
-    input_dict = {"text": test_text}
-    results = emotion_detection_textcnn.emotion_classify(data=input_dict, batch_size=2)
-    for result in results:
-        print(result['text'])
-        print(result['emotion_label'])
-        print(result['emotion_key'])
-        print(result['positive_probs'])
-        print(result['negative_probs'])
-        print(result['neutral_probs'])
--- a/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py
+++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py
-# -*- coding:utf-8 -*-
-import paddle.fluid as fluid
-def textcnn_net(emb, seq_len, emb_dim=128, hid_dim=128, hid_dim2=96, class_dim=3, win_sizes=None):
-    """
-    Textcnn_net
-    """
-    if win_sizes is None:
-        win_sizes = [1, 2, 3]
-    # unpad the token_feature
-    unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len)
-    # convolution layer
-    convs = []
-    for win_size in win_sizes:
-        conv_h = fluid.nets.sequence_conv_pool(
-            input=unpad_feature, num_filters=hid_dim, filter_size=win_size, act="tanh", pool_type="max")
-        convs.append(conv_h)
-    convs_out = fluid.layers.concat(input=convs, axis=1)
-    # full connect layer
-    fc_1 = fluid.layers.fc(input=[convs_out], size=hid_dim2, act="tanh")
-    # softmax layer
-    prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax")
-    return prediction, fc_1
--- a/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py
+++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py
-# -*- coding:utf-8 -*-
 import io
 import numpy as np