diff --git a/demo/senta/README.md b/demo/senta/README.md index ffafc02192d00b7470bf1a62151913eb346e7e4e..ff61224fd5a31f2e17d4f888b28ec2af59c47db5 100644 --- a/demo/senta/README.md +++ b/demo/senta/README.md @@ -1,10 +1,10 @@ -# senta +# PaddleHub Senta ## 关于 -本示例展示如何使用senta Module进行预测。 +本示例展示如何使用PaddleHub Senta Module进行预测。 -senta是中文情感分析模型,可以用于进行中文句子的情感分析,输出结果为`{正向/中性/负向}`中的一个,关于模型的训练细节,请查看[senta](https://github.com/baidu/senta) +Senta是百度NLP开放的中文情感分析模型,可以用于进行中文句子的情感分析,输出结果为`{正向/中性/负向}`中的一个,关于模型的结构细节,请查看[Senta](https://github.com/baidu/senta), 本示例代码选择的是Senta-BiLSTM模型。 ## 准备工作 @@ -27,18 +27,25 @@ $ pip install --upgrade paddlepaddle ## 命令行方式预测 -`infer.sh`给出了使用命令行调用Module预测的示例脚本 -通过以下命令试验下效果 +`cli_demo.sh`给出了使用命令行接口 (Command Line Interface) 调用Module预测的示例脚本 +通过以下命令体验下效果 ```shell -$ sh infer.sh +$ sh cli_demo.sh ``` ## 通过python API预测 -`infer_by_code.py`给出了使用python API调用Module预测的示例代码 +`senta_demo.py`给出了使用python API调用Module预测的示例代码 通过以下命令试验下效果 ```shell -python infer_by_code.py +python senta_demo.py +``` + +## 通过PaddleHub Finetune API微调 +`senta_finetune.py` 给出了如何使用Senta模型的句子特征进行Fine-tuning的实例代码。 +可以运行以下命令在ChnSentiCorp数据集上进行Fine-tuning. +```shell +$ sh run_finetune.sh ``` diff --git a/demo/senta/infer.sh b/demo/senta/cli_demo.sh similarity index 100% rename from demo/senta/infer.sh rename to demo/senta/cli_demo.sh diff --git a/demo/senta/infer_by_code.py b/demo/senta/infer_by_code.py deleted file mode 100644 index 1033bb7d9189603f5a3dd1acc26aafee71813944..0000000000000000000000000000000000000000 --- a/demo/senta/infer_by_code.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -import paddlehub as hub - - -def infer_with_input_text(): - # get senta module - senta = hub.Module(name="senta") - - test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - - # get the input keys for signature 'sentiment_classify' - data_format = senta.processor.data_format(sign_name='sentiment_classify') - key = list(data_format.keys())[0] - - # set input dict - input_dict = {key: test_text} - - # execute predict and print the result - results = senta.sentiment_classify(data=input_dict) - for index, result in enumerate(results): - hub.logger.info("sentence %d segmented result: %s" % - (index + 1, result['sentiment_key'])) - - -def infer_with_input_file(): - # get senta module - senta = hub.Module(name="senta") - - # get the input keys for signature 'sentiment_classify' - data_format = senta.processor.data_format(sign_name='sentiment_classify') - key = list(data_format.keys())[0] - - # parse input file - test_file = os.path.join("test", "test.txt") - test_text = hub.io.parser.txt_parser.parse(test_file) - - # set input dict - input_dict = {key: test_text} - results = senta.sentiment_classify(data=input_dict) - for index, result in enumerate(results): - hub.logger.info("sentence %d segmented result: %s" % - (index + 1, result['sentiment_key'])) - - -if __name__ == "__main__": - infer_with_input_text() diff --git a/demo/senta/run_classifier.sh b/demo/senta/run_finetune.sh similarity index 60% rename from demo/senta/run_classifier.sh rename to demo/senta/run_finetune.sh index 5fd33cf014d4d4a230a9f6c46fc0a4bb18dfdd9c..77361f696ebff07ff6f6f756a4b20fb9f35f3633 100644 --- a/demo/senta/run_classifier.sh +++ b/demo/senta/run_finetune.sh @@ -1,11 +1,10 @@ export CUDA_VISIBLE_DEVICES=0 -# User can select chnsenticorp, nlpcc_dbqa, lcqmc for different task DATASET="chnsenticorp" CKPT_DIR="./ckpt_${DATASET}" -python -u text_classifier.py \ +python -u senta_finetune.py \ --batch_size=24 \ --use_gpu=False \ --checkpoint_dir=${CKPT_DIR} \ - --num_epoch=10 + --num_epoch=3 diff --git a/demo/senta/senta_demo.py b/demo/senta/senta_demo.py new file mode 100644 index 0000000000000000000000000000000000000000..c23bc5300dc78d6012d000fc3fc89ed845acfd8a --- /dev/null +++ b/demo/senta/senta_demo.py @@ -0,0 +1,15 @@ +# coding: utf-8 +import os +import paddlehub as hub + +if __name__ == "__main__": + # Load Senta-BiLSTM module + senta = hub.Module(name="senta") + + test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] + + input_dict = {"text": test_text} + + results = senta.sentiment_classify(data=input_dict) + for index, result in enumerate(results): + print(test_text[index], result['sentiment_key']) diff --git a/demo/senta/text_classifier.py b/demo/senta/senta_finetune.py similarity index 100% rename from demo/senta/text_classifier.py rename to demo/senta/senta_finetune.py diff --git a/demo/senta/test/test.txt b/demo/senta/test/test.txt index 80c4e030fa95c22e19688e7030e7b682c006ba09..8ac5c8dfe0125b2249b0e8162b5a6e4d5e97601b 100644 --- a/demo/senta/test/test.txt +++ b/demo/senta/test/test.txt @@ -1,2 +1,2 @@ -这部电影真的很赞 -售后太差! +这家餐厅很好吃 +这部电影真的很差劲