diff --git a/README.md b/README.md index 99beee72ce9efab970f1b67f5a7e187ae33e9819..4d748df6d38ed53fb489a0923594d38d0fe1aa32 100644 --- a/README.md +++ b/README.md @@ -9,21 +9,21 @@ PaddleHub是基于PaddlePaddle生态下的预训练模型管理和迁移学习 * 便捷地获取PaddlePaddle生态下的所有预训练模型,涵盖了图像分类、目标检测、词法分析、语义模型、情感分析、语言模型、视频分类、图像生成、图像分割等主流模型。 * 更多详情可查看官网:https://www.paddlepaddle.org.cn/hub * 通过PaddleHub Fine-tune API,结合少量代码即可完成**大规模预训练模型**的迁移学习,具体Demo可参考以下链接: - * [文本分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.3/demo/text-classification) - * [序列标注](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.3/demo/sequence-labeling) - * [多标签分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.3/demo/multi-label-classification) - * [图像分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.3/demo/image-classification) - * [检索式问答任务](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.3/demo/qa_classification) - * [回归任务](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.3/demo/sentence_similarity) - * [句子语义相似度计算](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.3/demo/sentence_similarity) - * [阅读理解任务](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.3/demo/reading-comprehension) + * [文本分类](./demo/text-classification) + * [序列标注](./demo/sequence-labeling) + * [多标签分类](./demo/multi-label-classification) + * [图像分类](./demo/image-classification) + * [检索式问答任务](./demo/qa_classification) + * [回归任务](./demo/sentence_similarity) + * [句子语义相似度计算](./demo/sentence_similarity) + * [阅读理解任务](./demo/reading-comprehension) * 支持超参优化(AutoDL Finetuner),自动调整超参数,给出效果较佳的超参数组合。 - * [PaddleHub超参优化功能AutoDL Finetuner使用示例](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.3/demo/autofinetune) + * [PaddleHub超参优化功能AutoDL Finetuner使用示例](./demo/autofinetune) * 引入『**模型即软件**』的设计理念,通过Python API或者命令行实现一键预测,更方便地应用PaddlePaddle模型库。 * [PaddleHub命令行工具介绍](https://github.com/PaddlePaddle/PaddleHub/wiki/PaddleHub%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%B7%A5%E5%85%B7) * 一键Module服务化部署 - HubServing * [PaddleHub-Serving一键服务部署](https://github.com/PaddlePaddle/PaddleHub/wiki/PaddleHub-Serving%E4%B8%80%E9%94%AE%E6%9C%8D%E5%8A%A1%E9%83%A8%E7%BD%B2) - * [使用示例](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.3/demo/serving) + * [使用示例](./demo/serving) ## 目录 @@ -38,17 +38,30 @@ PaddleHub是基于PaddlePaddle生态下的预训练模型管理和迁移学习 ## 安装 ### 环境依赖 -* Python==2.7 or Python>=3.5 -* PaddlePaddle>=1.6.1 +* Python==2.7 or Python>=3.5 for Linux or Mac + + **Python>=3.6 for Windows** + +* PaddlePaddle>=1.5 除上述依赖外,PaddleHub的预训练模型和预置数据集需要连接服务端进行下载,请确保机器可以正常访问网络。若本地已存在相关的数据集和预训练模型,则可以离线运行PaddleHub。 -**NOTE:** 若是出现离线运行PaddleHub错误,请更新PaddleHub 1.1.1版本之上。 +**NOTE:** +1. 若是出现离线运行PaddleHub错误,请更新PaddleHub 1.1.1版本之上。 pip安装方式如下: ```shell $ pip install paddlehub ``` +2. 下载数据集、module等,PaddleHub要求机器可以访问外网。可以使用server_check()可以检查本地与远端PaddleHub-Server的连接状态,使用方法如下: + +```python +import paddlehub +paddlehub.server_check() +# 如果可以连接远端PaddleHub-Server,则显示Request Hub-Server successfully. +# 如果无法连接远端PaddleHub-Server,则显示Request Hub-Server unsuccessfully. +``` + ## 快速体验 安装成功后,执行下面的命令,可以快速体验PaddleHub无需代码、一键预测的命令行功能: @@ -86,11 +99,14 @@ $ hub run faster_rcnn_coco2017 --input_path test_object_detection.jpg |类别|AIStudio链接| |-|-| +|ERNIE Tiny 文本分类|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/186443) |ERNIE文本分类|[点击体验](https://aistudio.baidu.com/aistudio/projectDetail/79380)| |ERNIE序列标注|[点击体验](https://aistudio.baidu.com/aistudio/projectDetail/79377)| |ELMo文本分类|[点击体验](https://aistudio.baidu.com/aistudio/projectDetail/79400)| |senta情感分类|[点击体验](https://aistudio.baidu.com/aistudio/projectDetail/79398)| -|图像分类|[点击体验](https://aistudio.baidu.com/aistudio/projectDetail/79378)| +|图像分类| [点击体验](https://aistudio.baidu.com/aistudio/projectDetail/79378)| +|自定义数据FineTune(序列标注任务)|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/184200)| +|自定义数据FineTune(文本分类任务)|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/185121) | ## 教程 @@ -149,4 +165,4 @@ print(res) ## 更新历史 -详情参考[更新历史](https://github.com/PaddlePaddle/PaddleHub/blob/release/v1.3/RELEASE.md) +详情参考[更新历史](./RELEASE.md) diff --git a/RELEASE.md b/RELEASE.md index 8c5cc92e386b5ebc5dc64c7411345772d8fcafaf..ca99a4caa4318c2243238e2d76f522f9b2329ab4 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,16 @@ +# `v1.4.1` + +* 修复利用Transformer类模型完成序列标注任务适配paddle1.6版本的问题 +* Windows下兼容性提升为python >= 3.6 + +# `v1.4.0` + +* 新增预训练模型ERNIE tiny +* 新增数据集:INEWS、BQ、DRCD、CMRC2018、THUCNEWS,支持ChineseGLUE(CLUE)V0 所有任务 +* 修复module与PaddlePaddle版本兼容性问题 +* 优化Hub Serving启动过程和模型加载流程,提高服务响应速度 + + # `v1.3.0` * 新增PaddleHub Serving服务部署 diff --git a/paddlehub/finetune/task/basic_task.py b/paddlehub/finetune/task/basic_task.py index d72dc568614b26884be61084ab137f752990886f..c53504e874a3634105f8ecd9311a70347bc7e141 100644 --- a/paddlehub/finetune/task/basic_task.py +++ b/paddlehub/finetune/task/basic_task.py @@ -261,6 +261,10 @@ class BasicTask(object): var = self.env.main_program.global_block().vars[var_name] var.persistable = True + # to avoid to print logger two times in result of the logger usage of paddle-fluid 1.6 + for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) + if self.is_train_phase: with fluid.program_guard(self.env.main_program, self._base_startup_program): @@ -287,7 +291,7 @@ class BasicTask(object): self.exe.run(self.env.startup_program) - # to avoid to print logger two times in result of the logger usage of paddle-fluid + # to avoid to print logger two times in result of the logger usage of paddle-fluid 1.5 for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) @@ -588,6 +592,7 @@ class BasicTask(object): return self.finetune(do_eval=True) def finetune(self, do_eval=False): + # Start to finetune with self.phase_guard(phase="train"): self.init_if_necessary() diff --git a/paddlehub/finetune/task/sequence_task.py b/paddlehub/finetune/task/sequence_task.py index caa93dff7533400b3f52c873ed98d4e214dc5957..64d55a6660a5824649a1e6646aecf272bf1961b8 100644 --- a/paddlehub/finetune/task/sequence_task.py +++ b/paddlehub/finetune/task/sequence_task.py @@ -19,9 +19,12 @@ from __future__ import print_function import time from collections import OrderedDict + import numpy as np +import paddle import paddle.fluid as fluid from paddlehub.finetune.evaluate import chunk_eval, calculate_f1 +from paddlehub.common.utils import version_compare from .basic_task import BasicTask @@ -61,8 +64,12 @@ class SequenceLabelTask(BasicTask): return True def _build_net(self): - self.seq_len = fluid.layers.data( - name="seq_len", shape=[1], dtype='int64') + if version_compare(paddle.__version__, "1.6"): + self.seq_len = fluid.layers.data( + name="seq_len", shape=[-1], dtype='int64') + else: + self.seq_len = fluid.layers.data( + name="seq_len", shape=[1], dtype='int64') seq_len = fluid.layers.assign(self.seq_len) if self.add_crf: