From b2853121782475cbc537a75e42cf5f5fc5be2665 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 4 Nov 2022 18:58:04 +0800 Subject: [PATCH] update porn_detection_cnn (#2102) --- .../text_review/porn_detection_cnn/README.md | 32 +++--- .../text_review/porn_detection_cnn/module.py | 98 +++---------------- .../porn_detection_cnn/processor.py | 2 +- 3 files changed, 34 insertions(+), 98 deletions(-) diff --git a/modules/text/text_review/porn_detection_cnn/README.md b/modules/text/text_review/porn_detection_cnn/README.md index 588ce206..3ed0228c 100644 --- a/modules/text/text_review/porn_detection_cnn/README.md +++ b/modules/text/text_review/porn_detection_cnn/README.md @@ -22,7 +22,7 @@ - ### 1、环境依赖 - paddlepaddle >= 1.6.2 - + - paddlehub >= 1.6.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 @@ -41,45 +41,45 @@ - ```shell $ hub run porn_detection_cnn --input_text "黄片下载" ``` - + - 或者 - ```shell $ hub run porn_detection_cnn --input_file test.txt ``` - + - 其中test.txt存放待审查文本,每行仅放置一段待审核文本 - + - 通过命令行方式实现hub模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - ### 2、预测代码示例 - ```python import paddlehub as hub - + porn_detection_cnn = hub.Module(name="porn_detection_cnn") - + test_text = ["黄片下载", "打击黄牛党"] - + results = porn_detection_cnn.detection(texts=test_text, use_gpu=True, batch_size=1) - + for index, text in enumerate(test_text): results[index]["text"] = text for index, result in enumerate(results): print(results[index]) - + # 输出结果如下: # {'text': '黄片下载', 'porn_detection_label': 1, 'porn_detection_key': 'porn', 'porn_probs': 0.9324, 'not_porn_probs': 0.0676} # {'text': '打击黄牛党', 'porn_detection_label': 0, 'porn_detection_key': 'not_porn', 'porn_probs': 0.0004, 'not_porn_probs': 0.9996} ``` - + - ### 3、API - ```python def detection(texts=[], data={}, use_gpu=False, batch_size=1) ``` - + - porn_detection_cnn预测接口,鉴定输入句子是否包含色情文案 - **参数** @@ -145,7 +145,7 @@ - ```python import requests import json - + # 待预测数据 text = ["黄片下载", "打击黄牛党"] @@ -177,8 +177,10 @@ 大幅提升预测性能,同时简化接口使用 +* 1.2.0 + + 移除 fluid api + - ```shell - $ hub install porn_detection_cnn==1.1.0 + $ hub install porn_detection_cnn==1.2.0 ``` - - diff --git a/modules/text/text_review/porn_detection_cnn/module.py b/modules/text/text_review/porn_detection_cnn/module.py index c8456500..1ab904ab 100644 --- a/modules/text/text_review/porn_detection_cnn/module.py +++ b/modules/text/text_review/porn_detection_cnn/module.py @@ -1,31 +1,24 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json import math import os -import six -import paddle.fluid as fluid import paddlehub as hub -from paddlehub.common.paddle_helper import get_variable_info -from paddlehub.module.module import moduleinfo, serving -from paddlehub.reader import tokenization - -from porn_detection_cnn.processor import load_vocab, preprocess, postprocess - - -@moduleinfo( - name="porn_detection_cnn", - version="1.1.0", - summary="Baidu's open-source Porn Detection Model.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") +from .processor import load_vocab +from .processor import postprocess +from .processor import preprocess +from paddlehub.compat.task import tokenization +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import serving + + +@moduleinfo(name="porn_detection_cnn", + version="1.2.0", + summary="Baidu's open-source Porn Detection Model.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") class PornDetectionCNN(hub.NLPPredictionModule): - def _initialize(self): + + def __init__(self): """ initialize with the necessary elements """ @@ -42,42 +35,6 @@ class PornDetectionCNN(hub.NLPPredictionModule): self._set_config() - def context(self, trainable=False): - """ - Get the input ,output and program of the pretrained porn_detection_cnn - Args: - trainable(bool): whether fine-tune the pretrained parameters of porn_detection_cnn or not - Returns: - inputs(dict): the input variables of porn_detection_cnn (words) - outputs(dict): the output variables of porn_detection_cnn (the sentiment prediction results) - main_program(Program): the main_program of porn_detection_cnn with pretrained prameters - """ - place = fluid.CPUPlace() - exe = fluid.Executor(place) - program, feed_target_names, fetch_targets = fluid.io.load_inference_model( - dirname=self.pretrained_model_path, executor=exe) - - with open(self.param_file, 'r') as file: - params_list = file.readlines() - for param in params_list: - param = param.strip() - var = program.global_block().var(param) - var_info = get_variable_info(var) - - program.global_block().create_parameter( - shape=var_info['shape'], dtype=var_info['dtype'], name=var_info['name']) - - for param in program.global_block().iter_parameters(): - param.trainable = trainable - - for name, var in program.global_block().vars.items(): - if name == feed_target_names[0]: - inputs = {"words": var} - # output of sencond layer from the end prediction layer (fc-softmax) - if name == "@HUB_porn_detection_cnn@layer_norm_1.tmp_2": - outputs = {"class_probs": fetch_targets[0], "sentence_feature": var} - return inputs, outputs, program - @serving def detection(self, texts=[], data={}, use_gpu=False, batch_size=1): """ @@ -135,26 +92,3 @@ class PornDetectionCNN(hub.NLPPredictionModule): """ self.labels = {"porn": 1, "not_porn": 0} return self.labels - - -if __name__ == "__main__": - porn_detection_cnn = PornDetectionCNN() - test_text = ["黄片下载", "打击黄牛党"] - - results = porn_detection_cnn.detection(texts=test_text, batch_size=9) - for index, text in enumerate(test_text): - results[index]["text"] = text - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) - input_dict = {"text": test_text} - results = porn_detection_cnn.detection(data=input_dict) - for index, text in enumerate(test_text): - results[index]["text"] = text - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/modules/text/text_review/porn_detection_cnn/processor.py b/modules/text/text_review/porn_detection_cnn/processor.py index 1f6c8b56..5c29aa23 100644 --- a/modules/text/text_review/porn_detection_cnn/processor.py +++ b/modules/text/text_review/porn_detection_cnn/processor.py @@ -1,5 +1,5 @@ -# -*- coding: utf-8 -*- import io + import numpy as np -- GitLab