diff --git a/modules/text/text_review/porn_detection_gru/README.md b/modules/text/text_review/porn_detection_gru/README.md index 46ba978316b494116319d82f004b4b4259327b5b..38412e9619aa5876676b52bc2cf9b3889d8dbe26 100644 --- a/modules/text/text_review/porn_detection_gru/README.md +++ b/modules/text/text_review/porn_detection_gru/README.md @@ -22,7 +22,7 @@ - ### 1、环境依赖 - paddlepaddle >= 1.6.2 - + - paddlehub >= 1.6.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 @@ -42,45 +42,45 @@ - ```shell $ hub run porn_detection_gru --input_text "黄片下载" ``` - + - 或者 - ```shell $ hub run porn_detection_gru --input_file test.txt ``` - + - 其中test.txt存放待审查文本,每行仅放置一段待审核文本 - + - 通过命令行方式实现hub模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - ### 2、预测代码示例 - ```python import paddlehub as hub - + porn_detection_gru = hub.Module(name="porn_detection_gru") - + test_text = ["黄片下载", "打击黄牛党"] - + results = porn_detection_gru.detection(texts=test_text, use_gpu=True, batch_size=1) # 如不使用GPU,请修改为use_gpu=False - + for index, text in enumerate(test_text): results[index]["text"] = text for index, result in enumerate(results): print(results[index]) - + # 输出结果如下: # {'text': '黄片下载', 'porn_detection_label': 1, 'porn_detection_key': 'porn', 'porn_probs': 0.9324, 'not_porn_probs': 0.0676} # {'text': '打击黄牛党', 'porn_detection_label': 0, 'porn_detection_key': 'not_porn', 'porn_probs': 0.0004, 'not_porn_probs': 0.9996} ``` - + - ### 3、API - ```python def detection(texts=[], data={}, use_gpu=False, batch_size=1) ``` - + - porn_detection_gru预测接口,鉴定输入句子是否包含色情文案 - **参数** @@ -146,7 +146,7 @@ - ```python import requests import json - + # 待预测数据 text = ["黄片下载", "打击黄牛党"] @@ -179,7 +179,11 @@ 大幅提升预测性能,同时简化接口使用 +* 1.2.0 + + 移除 Fluid API + - ```shell - $ hub install porn_detection_gru==1.1.0 + $ hub install porn_detection_gru==1.2.0 ``` - + diff --git a/modules/text/text_review/porn_detection_gru/README_en.md b/modules/text/text_review/porn_detection_gru/README_en.md index 3a8446fad72920a5318888ddba9aea19cd6493bf..a8c03da419d403f1b7d04ee34f16450762300bcc 100644 --- a/modules/text/text_review/porn_detection_gru/README_en.md +++ b/modules/text/text_review/porn_detection_gru/README_en.md @@ -22,7 +22,7 @@ - ### 1、Environmental dependence - paddlepaddle >= 1.6.2 - + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、Installation @@ -42,45 +42,45 @@ - ```shell $ hub run porn_detection_gru --input_text "黄片下载" ``` - + - or - ```shell $ hub run porn_detection_gru --input_file test.txt ``` - + - test.txt stores the text to be reviewed. Each line contains only one text - + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command line instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - ### 2、Prediction Code Example - ```python import paddlehub as hub - + porn_detection_gru = hub.Module(name="porn_detection_gru") - + test_text = ["黄片下载", "打击黄牛党"] - + results = porn_detection_gru.detection(texts=test_text, use_gpu=True, batch_size=1) # If you do not use GPU, please set use_gpu=False - + for index, text in enumerate(test_text): results[index]["text"] = text for index, result in enumerate(results): print(results[index]) - + # The output: # {'text': '黄片下载', 'porn_detection_label': 1, 'porn_detection_key': 'porn', 'porn_probs': 0.9324, 'not_porn_probs': 0.0676} # {'text': '打击黄牛党', 'porn_detection_label': 0, 'porn_detection_key': 'not_porn', 'porn_probs': 0.0004, 'not_porn_probs': 0.9996} ``` - + - ### 3、API - ```python def detection(texts=[], data={}, use_gpu=False, batch_size=1) ``` - + - prediction api of porn_detection_gru,to identify whether input sentences contain pornography - **Parameter** @@ -144,7 +144,7 @@ - ```python import requests import json - + # data to be predicted text = ["黄片下载", "打击黄牛党"] @@ -177,7 +177,11 @@ Improves prediction performance and simplifies interface usage +* 1.2.0 + + 移除 Fluid API + - ```shell - $ hub install porn_detection_gru==1.1.0 + $ hub install porn_detection_gru==1.2.0 ``` - + diff --git a/modules/text/text_review/porn_detection_gru/module.py b/modules/text/text_review/porn_detection_gru/module.py index 4dd6b5aa31b92d4a9a9cb935a02d82b26fbf1735..6fa45275c3d795e253f21b4bf26a5d9f3388680c 100644 --- a/modules/text/text_review/porn_detection_gru/module.py +++ b/modules/text/text_review/porn_detection_gru/module.py @@ -1,31 +1,25 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json import math import os -import six - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import get_variable_info -from paddlehub.module.module import moduleinfo, serving -from paddlehub.reader import tokenization - -from porn_detection_gru.processor import load_vocab, preprocess, postprocess -@moduleinfo( - name="porn_detection_gru", - version="1.1.0", - summary="Baidu's open-source Porn Detection Model.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") +import paddlehub as hub +from .processor import load_vocab +from .processor import postprocess +from .processor import preprocess +from paddlehub.compat.task import tokenization +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import serving + + +@moduleinfo(name="porn_detection_gru", + version="1.2.0", + summary="Baidu's open-source Porn Detection Model.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") class PornDetectionGRU(hub.NLPPredictionModule): - def _initialize(self): + + def __init__(self): """ initialize with the necessary elements """ @@ -42,41 +36,6 @@ class PornDetectionGRU(hub.NLPPredictionModule): self._set_config() - def context(self, trainable=False): - """ - Get the input ,output and program of the pretrained porn_detection_gru - Args: - trainable(bool): whether fine-tune the pretrained parameters of porn_detection_gru or not - Returns: - inputs(dict): the input variables of porn_detection_gru (words) - outputs(dict): the output variables of porn_detection_gru (the sentiment prediction results) - main_program(Program): the main_program of lac with pretrained prameters - """ - place = fluid.CPUPlace() - exe = fluid.Executor(place) - program, feed_target_names, fetch_targets = fluid.io.load_inference_model( - dirname=self.pretrained_model_path, executor=exe) - - with open(self.param_file, 'r') as file: - params_list = file.readlines() - for param in params_list: - param = param.strip() - var = program.global_block().var(param) - var_info = get_variable_info(var) - program.global_block().create_parameter( - shape=var_info['shape'], dtype=var_info['dtype'], name=var_info['name']) - - for param in program.global_block().iter_parameters(): - param.trainable = trainable - - for name, var in program.global_block().vars.items(): - if name == feed_target_names[0]: - inputs = {"words": var} - # output of sencond layer from the end prediction layer (fc-softmax) - if name == "@HUB_porn_detection_gru@layer_norm_0.tmp_2": - outputs = {"class_probs": fetch_targets[0], "sentence_feature": var} - return inputs, outputs, program - @serving def detection(self, texts=[], data={}, use_gpu=False, batch_size=1): """ @@ -134,28 +93,3 @@ class PornDetectionGRU(hub.NLPPredictionModule): """ self.labels = {"porn": 1, "not_porn": 0} return self.labels - - -if __name__ == "__main__": - porn_detection_gru = PornDetectionGRU() - porn_detection_gru.context() - # porn_detection_gru = hub.Module(name='porn_detection_gru') - test_text = ["黄片下载", "打击黄牛党"] - - results = porn_detection_gru.detection(texts=test_text) - for index, text in enumerate(test_text): - results[index]["text"] = text - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) - input_dict = {"text": test_text} - results = porn_detection_gru.detection(data=input_dict) - for index, text in enumerate(test_text): - results[index]["text"] = text - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/modules/text/text_review/porn_detection_gru/processor.py b/modules/text/text_review/porn_detection_gru/processor.py index 1f6c8b565f53708f27735e51d6631015095f2cf6..5c29aa23e5e42aeaac343b15dd0080b973284735 100644 --- a/modules/text/text_review/porn_detection_gru/processor.py +++ b/modules/text/text_review/porn_detection_gru/processor.py @@ -1,5 +1,5 @@ -# -*- coding: utf-8 -*- import io + import numpy as np