module.py 5.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
# coding:utf-8
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import ast
import os
import math
import six
import time
from pathlib import Path

from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.module.module import runnable, serving, moduleinfo
from paddlehub.io.parser import txt_parser
from paddlehub.compat.module.nlp_module import DataFormatError
import numpy as np
import paddle
import paddlehub as hub

@moduleinfo(
    name="Rumor_prediction",
    version="1.0.0",
    type="nlp/semantic_model",
    summary=
    "Is the input text prediction a rumor",
    author="彭兆帅,郑博培",
    author_email="1084667371@qq.com,2733821739@qq.com")
class Rumorprediction(hub.Module):
    def _initialize(self):
        """
        Initialize with the necessary elements
        """
        # 加载模型路径
        self.default_pretrained_model_path = os.path.join(self.directory, "infer_model")
    
    def Rumor(self, texts, use_gpu=False):
        """
        Get the input and program of the infer model

        Args:
             image (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR.
             use_gpu(bool): Weather to use gpu
        """
        # 获取数据
        def get_data(sentence):
            # 读取数据字典
            with open(self.directory + '/dict.txt', 'r', encoding='utf-8') as f_data:
                dict_txt = eval(f_data.readlines()[0])
            dict_txt = dict(dict_txt)
            # 把字符串数据转换成列表数据
            keys = dict_txt.keys()
            data = []
            for s in sentence:
                # 判断是否存在未知字符
                if not s in keys:
                    s = '<unk>'
                data.append(int(dict_txt[s]))
            return data
        data = []
        for text in texts:
            text = get_data(text)
            data.append(text)
        base_shape = [[len(c) for c in data]]
        paddle.enable_static()
        place = paddle.CUDAPlace(0) if use_gpu else paddle.CPUPlace()
        exe = paddle.static.Executor(place)
        exe.run(paddle.static.default_startup_program())
        [infer_program, feeded_var_names, target_var] = paddle.fluid.io.load_inference_model(dirname=self.default_pretrained_model_path, executor=exe)
        # 生成预测数据
        tensor_words = paddle.fluid.create_lod_tensor(data, base_shape, place)
        # 执行预测
        result = exe.run(program=infer_program,
                        feed={feeded_var_names[0]: tensor_words},
                        fetch_list=target_var)
        # 分类名称
        names = [ '谣言', '非谣言']


        results = []

        # 获取结果概率最大的label
        for i in range(len(data)):
            content = texts[i]
            lab = np.argsort(result)[0][i][-1]

            alltext = {
                'content': content,
                'prediction': names[lab],
                'probability': result[0][i][lab]
            }
            alltext = [alltext]
            results = results + alltext
            
        return results

    
    def add_module_config_arg(self):
        """
        Add the command config options
        """
        self.arg_config_group.add_argument(
            '--use_gpu',
            type=ast.literal_eval,
            default=False,
            help="whether use GPU for prediction")

    def add_module_input_arg(self):
        """
        Add the command input options
        """
        self.arg_input_group.add_argument(
            '--input_text',
            type=str,
            default=None,
            help="input_text is str")
    @runnable
    def run_cmd(self, argvs):
        """
        Run as a command
        """
        self.parser = argparse.ArgumentParser(
            description='Run the %s module.' % self.name,
            prog='hub run %s' % self.name,
            usage='%(prog)s',
            add_help=True)

        self.arg_input_group = self.parser.add_argument_group(
            title="Input options", description="Input data. Required")
        self.arg_config_group = self.parser.add_argument_group(
            title="Config options",
            description=
            "Run configuration for controlling module behavior, optional.")

        self.add_module_config_arg()
        self.add_module_input_arg()

        args = self.parser.parse_args(argvs)
        input_text = [args.input_text]
        results = self.Rumor(
            texts=input_text, use_gpu=args.use_gpu)

        return results